From 89011fc3c965811bbc3b6a0770c9d94bb264ad3f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 19 Jul 2020 21:26:02 -0400 Subject: [PATCH] AMDGPU/GlobalISel: Select llvm.returnaddress --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 50 +++++++++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 3 +- .../GlobalISel/inst-select-returnaddress.mir | 122 +++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/returnaddress.ll | 24 +++- 5 files changed, 196 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 43f5e53..0beff5c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -872,6 +872,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { return selectBallot(I); case Intrinsic::amdgcn_reloc_constant: return selectRelocConstant(I); + case Intrinsic::returnaddress: + return selectReturnAddress(I); default: return selectImpl(I, *CoverageInfo); } @@ -1077,6 +1079,54 @@ bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { + MachineBasicBlock *MBB = I.getParent(); + MachineFunction &MF = *MBB->getParent(); + const DebugLoc &DL = I.getDebugLoc(); + + MachineOperand &Dst = I.getOperand(0); + Register DstReg = Dst.getReg(); + unsigned Depth = I.getOperand(2).getImm(); + + const TargetRegisterClass *RC + = TRI.getConstrainedRegClassForOperand(Dst, *MRI); + if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) || + !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) + return false; + + MachineBasicBlock &EntryMBB = MF.front(); + + // Check for kernel and shader functions + if (Depth != 0 || + MF.getInfo()->isEntryFunction()) { + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg) + .addImm(0); + I.eraseFromParent(); + return true; + } + + Register ReturnAddrReg = TRI.getReturnAddressReg(MF); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + // There is a call to @llvm.returnaddress in this function + MFI.setReturnAddressIsTaken(true); + + // Get the return address reg and mark it as an implicit live-in + Register LiveIn = MRI->getLiveInVirtReg(ReturnAddrReg); + if (!LiveIn) { + LiveIn = MF.addLiveIn(ReturnAddrReg, RC); + BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(AMDGPU::COPY), LiveIn) + .addReg(ReturnAddrReg); + if (!EntryMBB.isLiveIn(ReturnAddrReg)) + EntryMBB.addLiveIn(ReturnAddrReg); + } + + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg) + .addReg(LiveIn); + I.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const { // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 1a0a9ce..c86f5f7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -109,6 +109,7 @@ private: bool selectIntrinsicIcmp(MachineInstr &MI) const; bool selectBallot(MachineInstr &I) const; bool selectRelocConstant(MachineInstr &I) const; + bool selectReturnAddress(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const; bool selectEndCfIntrinsic(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9674474..f71e1be 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4023,7 +4023,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_kernarg_segment_ptr: case Intrinsic::amdgcn_s_getpc: case Intrinsic::amdgcn_groupstaticsize: - case Intrinsic::amdgcn_reloc_constant: { + case Intrinsic::amdgcn_reloc_constant: + case Intrinsic::returnaddress: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir new file mode 100644 index 0000000..72927e7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir @@ -0,0 +1,122 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: return_address_already_live_in_copy +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%0' } + +body: | + bb.0: + liveins: $sgpr30_sgpr31 + ; CHECK-LABEL: name: return_address_already_live_in_copy + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + %0:sgpr(p0) = COPY $sgpr30_sgpr31 + %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0, implicit %1 +... + +--- +name: return_address_already_block_live_in_copy_not_mf_life_in +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr30_sgpr31 + ; CHECK-LABEL: name: return_address_already_block_live_in_copy_not_mf_life_in + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY]] + %0:sgpr(p0) = COPY $sgpr30_sgpr31 + %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0, implicit %1 +... + +--- +name: return_address_no_live_in +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + + ; CHECK-LABEL: name: return_address_no_live_in + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_ENDPGM 0, implicit [[COPY]] + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0 +... + +--- +name: return_address_no_live_in_non_entry_block +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: return_address_no_live_in_non_entry_block + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.1: + ; CHECK: S_ENDPGM 0, implicit [[COPY]] + bb.0: + G_BR %bb.1 + + bb.1: + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0 +... + +--- +name: return_address_multi_use +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: return_address_multi_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.1: + ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + bb.0: + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + G_BR %bb.1 + + bb.1: + %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0, implicit %1 +... + +--- +name: return_address_kernel_is_null +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + +body: | + bb.0: + ; CHECK-LABEL: name: return_address_kernel_is_null + ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_]] + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/returnaddress.ll b/llvm/test/CodeGen/AMDGPU/returnaddress.ll index 1db6e3e..7937ba6 100644 --- a/llvm/test/CodeGen/AMDGPU/returnaddress.ll +++ b/llvm/test/CodeGen/AMDGPU/returnaddress.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s ; Test with zero frame ; GCN-LABEL: {{^}}func1 @@ -25,7 +26,7 @@ entry: ; Test with amdgpu_kernel ; GCN-LABEL: {{^}}func3 ; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 +; GCN: v_mov_b32_e32 v1, {{v0|0}} define amdgpu_kernel void @func3(i8** %out) nounwind { entry: %tmp = tail call i8* @llvm.returnaddress(i32 0) @@ -36,7 +37,7 @@ entry: ; Test with use outside the entry-block ; GCN-LABEL: {{^}}func4 ; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 +; GCN: v_mov_b32_e32 v1, {{v0|0}} define amdgpu_kernel void @func4(i8** %out, i32 %val) nounwind { entry: %cmp = icmp ne i32 %val, 0 @@ -61,5 +62,22 @@ entry: unreachable } +declare void @callee() + +; GCN-LABEL: {{^}}multi_use: +; GCN-DAG: v_mov_b32_e32 v[[LO:4[0-9]+]], s30 +; GCN-DAG: v_mov_b32_e32 v[[HI:4[0-9]+]], s31 +; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +; GCN: s_swappc_b64 +; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @multi_use() nounwind { +entry: + %ret0 = tail call i8* @llvm.returnaddress(i32 0) + store volatile i8* %ret0, i8* addrspace(1)* undef + call void @callee() + %ret1 = tail call i8* @llvm.returnaddress(i32 0) + store volatile i8* %ret1, i8* addrspace(1)* undef + ret void +} declare i8* @llvm.returnaddress(i32) nounwind readnone -- 2.7.4