return selectBallot(I);
case Intrinsic::amdgcn_reloc_constant:
return selectRelocConstant(I);
+ case Intrinsic::returnaddress:
+ return selectReturnAddress(I);
default:
return selectImpl(I, *CoverageInfo);
}
return true;
}
+bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
+ MachineBasicBlock *MBB = I.getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+
+ MachineOperand &Dst = I.getOperand(0);
+ Register DstReg = Dst.getReg();
+ unsigned Depth = I.getOperand(2).getImm();
+
+ const TargetRegisterClass *RC
+ = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
+ if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) ||
+ !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
+ return false;
+
+ MachineBasicBlock &EntryMBB = MF.front();
+
+ // Check for kernel and shader functions
+ if (Depth != 0 ||
+ MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
+ .addImm(0);
+ I.eraseFromParent();
+ return true;
+ }
+
+ Register ReturnAddrReg = TRI.getReturnAddressReg(MF);
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ // There is a call to @llvm.returnaddress in this function
+ MFI.setReturnAddressIsTaken(true);
+
+ // Get the return address reg and mark it as an implicit live-in
+ Register LiveIn = MRI->getLiveInVirtReg(ReturnAddrReg);
+ if (!LiveIn) {
+ LiveIn = MF.addLiveIn(ReturnAddrReg, RC);
+ BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(AMDGPU::COPY), LiveIn)
+ .addReg(ReturnAddrReg);
+ if (!EntryMBB.isLiveIn(ReturnAddrReg))
+ EntryMBB.addLiveIn(ReturnAddrReg);
+ }
+
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
+ .addReg(LiveIn);
+ I.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
bool selectIntrinsicIcmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
bool selectRelocConstant(MachineInstr &I) const;
+ bool selectReturnAddress(MachineInstr &I) const;
bool selectG_INTRINSIC(MachineInstr &I) const;
bool selectEndCfIntrinsic(MachineInstr &MI) const;
case Intrinsic::amdgcn_kernarg_segment_ptr:
case Intrinsic::amdgcn_s_getpc:
case Intrinsic::amdgcn_groupstaticsize:
- case Intrinsic::amdgcn_reloc_constant: {
+ case Intrinsic::amdgcn_reloc_constant:
+ case Intrinsic::returnaddress: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: return_address_already_live_in_copy
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr30_sgpr31', virtual-reg: '%0' }
+
+body: |
+ bb.0:
+ liveins: $sgpr30_sgpr31
+ ; CHECK-LABEL: name: return_address_already_live_in_copy
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+ ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]]
+ %0:sgpr(p0) = COPY $sgpr30_sgpr31
+ %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+ S_ENDPGM 0, implicit %0, implicit %1
+...
+
+---
+name: return_address_already_block_live_in_copy_not_mf_life_in
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr30_sgpr31
+ ; CHECK-LABEL: name: return_address_already_block_live_in_copy_not_mf_life_in
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+ ; CHECK: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY]]
+ %0:sgpr(p0) = COPY $sgpr30_sgpr31
+ %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+ S_ENDPGM 0, implicit %0, implicit %1
+...
+
+---
+name: return_address_no_live_in
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+
+ ; CHECK-LABEL: name: return_address_no_live_in
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+ ; CHECK: S_ENDPGM 0, implicit [[COPY]]
+ %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+ S_ENDPGM 0, implicit %0
+...
+
+---
+name: return_address_no_live_in_non_entry_block
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ ; CHECK-LABEL: name: return_address_no_live_in_non_entry_block
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+ ; CHECK: S_BRANCH %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: S_ENDPGM 0, implicit [[COPY]]
+ bb.0:
+ G_BR %bb.1
+
+ bb.1:
+ %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+ S_ENDPGM 0, implicit %0
+...
+
+---
+name: return_address_multi_use
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ ; CHECK-LABEL: name: return_address_multi_use
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+ ; CHECK: S_BRANCH %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]]
+ bb.0:
+ %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+ G_BR %bb.1
+
+ bb.1:
+ %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+ S_ENDPGM 0, implicit %0, implicit %1
+...
+
+---
+name: return_address_kernel_is_null
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: return_address_kernel_is_null
+ ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_]]
+ %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+ S_ENDPGM 0, implicit %0
+...
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
; Test with zero frame
; GCN-LABEL: {{^}}func1
; Test with amdgpu_kernel
; GCN-LABEL: {{^}}func3
; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
+; GCN: v_mov_b32_e32 v1, {{v0|0}}
define amdgpu_kernel void @func3(i8** %out) nounwind {
entry:
%tmp = tail call i8* @llvm.returnaddress(i32 0)
; Test with use outside the entry-block
; GCN-LABEL: {{^}}func4
; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
+; GCN: v_mov_b32_e32 v1, {{v0|0}}
define amdgpu_kernel void @func4(i8** %out, i32 %val) nounwind {
entry:
%cmp = icmp ne i32 %val, 0
unreachable
}
+declare void @callee()
+
+; GCN-LABEL: {{^}}multi_use:
+; GCN-DAG: v_mov_b32_e32 v[[LO:4[0-9]+]], s30
+; GCN-DAG: v_mov_b32_e32 v[[HI:4[0-9]+]], s31
+; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: s_swappc_b64
+; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @multi_use() nounwind {
+entry:
+ %ret0 = tail call i8* @llvm.returnaddress(i32 0)
+ store volatile i8* %ret0, i8* addrspace(1)* undef
+ call void @callee()
+ %ret1 = tail call i8* @llvm.returnaddress(i32 0)
+ store volatile i8* %ret1, i8* addrspace(1)* undef
+ ret void
+}
declare i8* @llvm.returnaddress(i32) nounwind readnone