return selectBallot(I);
case Intrinsic::amdgcn_reloc_constant:
return selectRelocConstant(I);
+ case Intrinsic::amdgcn_groupstaticsize:
+ return selectGroupStaticSize(I);
case Intrinsic::returnaddress:
return selectReturnAddress(I);
default:
return true;
}
+bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const {
+ Triple::OSType OS = MF->getTarget().getTargetTriple().getOS();
+
+ Register DstReg = I.getOperand(0).getReg();
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ?
+ AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+
+ MachineBasicBlock *MBB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+
+ auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg);
+
+ if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) {
+ const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ MIB.addImm(MFI->getLDSSize());
+ } else {
+ Module *M = MF->getFunction().getParent();
+ const GlobalValue *GV
+ = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_groupstaticsize);
+ MIB.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO);
+ }
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
MachineBasicBlock *MBB = I.getParent();
MachineFunction &MF = *MBB->getParent();
bool selectIntrinsicIcmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
bool selectRelocConstant(MachineInstr &I) const;
+ bool selectGroupStaticSize(MachineInstr &I) const;
bool selectReturnAddress(MachineInstr &I) const;
bool selectG_INTRINSIC(MachineInstr &I) const;
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MESA %s
+
+---
+name: groupstaticsize_v
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ ldsSize: 4096
+
+body: |
+ bb.0:
+
+ ; HSAPAL-LABEL: name: groupstaticsize_v
+ ; HSAPAL: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+ ; HSAPAL: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ ; MESA-LABEL: name: groupstaticsize_v
+ ; MESA: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize, implicit $exec
+ ; MESA: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
+ S_ENDPGM 0, implicit %0
+...
+
+---
+name: groupstaticsize_s
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ ldsSize: 1024
+
+body: |
+ bb.0:
+
+ ; HSAPAL-LABEL: name: groupstaticsize_s
+ ; HSAPAL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; HSAPAL: S_ENDPGM 0, implicit [[S_MOV_B32_]]
+ ; MESA-LABEL: name: groupstaticsize_s
+ ; MESA: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize
+ ; MESA: S_ENDPGM 0, implicit [[S_MOV_B32_]]
+ %0:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
+ S_ENDPGM 0, implicit %0
+...
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,NOHSA %s
+; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s
+; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s
+
@lds0 = addrspace(3) global [512 x float] undef, align 4
@lds1 = addrspace(3) global [256 x float] undef, align 4