From 2f5f5febf3e4fa9bc80e8a8f63a99d3e6813c499 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 26 Jul 2020 15:43:48 -0400 Subject: [PATCH] AMDGPU/GlobalISel: Select llvm.amdgcn.groupstaticsize Previously, it would successfully select and assert if not HSA or PAL when expanding the pseudoinstruction. We don't need the pseudoinstruction anymore since we know the total size after legalization. --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 29 ++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + .../inst-select-amdgcn.groupstaticsize.mir | 46 ++++++++++++++++++++++ .../CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll | 4 ++ 4 files changed, 80 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index f2ecc50..c9f9eb6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -930,6 +930,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { return selectBallot(I); case Intrinsic::amdgcn_reloc_constant: return selectRelocConstant(I); + case Intrinsic::amdgcn_groupstaticsize: + return selectGroupStaticSize(I); case Intrinsic::returnaddress: return selectReturnAddress(I); default: @@ -1137,6 +1139,33 @@ bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const { + Triple::OSType OS = MF->getTarget().getTargetTriple().getOS(); + + Register DstReg = I.getOperand(0).getReg(); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ? + AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; + + MachineBasicBlock *MBB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + + auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg); + + if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) { + const SIMachineFunctionInfo *MFI = MF->getInfo(); + MIB.addImm(MFI->getLDSSize()); + } else { + Module *M = MF->getFunction().getParent(); + const GlobalValue *GV + = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_groupstaticsize); + MIB.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO); + } + + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { MachineBasicBlock *MBB = I.getParent(); MachineFunction &MF = *MBB->getParent(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index b188672..969ef59 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -110,6 +110,7 @@ private: bool selectIntrinsicIcmp(MachineInstr &MI) const; bool selectBallot(MachineInstr &I) const; bool selectRelocConstant(MachineInstr &I) const; + bool selectGroupStaticSize(MachineInstr &I) const; bool selectReturnAddress(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir new file mode 100644 index 0000000..4e45fe6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MESA %s + +--- +name: groupstaticsize_v +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + ldsSize: 4096 + +body: | + bb.0: + + ; HSAPAL-LABEL: name: groupstaticsize_v + ; HSAPAL: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; HSAPAL: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MESA-LABEL: name: groupstaticsize_v + ; MESA: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize, implicit $exec + ; MESA: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0 +... + +--- +name: groupstaticsize_s +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + ldsSize: 1024 + +body: | + bb.0: + + ; HSAPAL-LABEL: name: groupstaticsize_s + ; HSAPAL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; HSAPAL: S_ENDPGM 0, implicit [[S_MOV_B32_]] + ; MESA-LABEL: name: groupstaticsize_s + ; MESA: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize + ; MESA: S_ENDPGM 0, implicit [[S_MOV_B32_]] + %0:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll index 3224d8a..db4032e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll @@ -2,6 +2,10 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,NOHSA %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s + @lds0 = addrspace(3) global [512 x float] undef, align 4 @lds1 = addrspace(3) global [256 x float] undef, align 4 -- 2.7.4