AMDGPU/GlobalISel: Select llvm.returnaddress
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 20 Jul 2020 01:26:02 +0000 (21:26 -0400)
committerMatt Arsenault <arsenm2@gmail.com>
Tue, 4 Aug 2020 21:14:38 +0000 (17:14 -0400)
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir [new file with mode: 0644]
llvm/test/CodeGen/AMDGPU/returnaddress.ll

index 43f5e53..0beff5c 100644 (file)
@@ -872,6 +872,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
     return selectBallot(I);
   case Intrinsic::amdgcn_reloc_constant:
     return selectRelocConstant(I);
+  case Intrinsic::returnaddress:
+    return selectReturnAddress(I);
   default:
     return selectImpl(I, *CoverageInfo);
   }
@@ -1077,6 +1079,54 @@ bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
   return true;
 }
 
+bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
+  MachineBasicBlock *MBB = I.getParent();
+  MachineFunction &MF = *MBB->getParent();
+  const DebugLoc &DL = I.getDebugLoc();
+
+  MachineOperand &Dst = I.getOperand(0);
+  Register DstReg = Dst.getReg();
+  unsigned Depth = I.getOperand(2).getImm();
+
+  const TargetRegisterClass *RC
+    = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
+  if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) ||
+      !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
+    return false;
+
+  MachineBasicBlock &EntryMBB = MF.front();
+
+  // Check for kernel and shader functions
+  if (Depth != 0 ||
+      MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
+    BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
+      .addImm(0);
+    I.eraseFromParent();
+    return true;
+  }
+
+  Register ReturnAddrReg = TRI.getReturnAddressReg(MF);
+
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  // There is a call to @llvm.returnaddress in this function
+  MFI.setReturnAddressIsTaken(true);
+
+  // Get the return address reg and mark it as an implicit live-in
+  Register LiveIn = MRI->getLiveInVirtReg(ReturnAddrReg);
+  if (!LiveIn) {
+    LiveIn = MF.addLiveIn(ReturnAddrReg, RC);
+    BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(AMDGPU::COPY), LiveIn)
+      .addReg(ReturnAddrReg);
+    if (!EntryMBB.isLiveIn(ReturnAddrReg))
+      EntryMBB.addLiveIn(ReturnAddrReg);
+  }
+
+  BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
+    .addReg(LiveIn);
+  I.eraseFromParent();
+  return true;
+}
+
 bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
   // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
   // SelectionDAG uses for wave32 vs wave64.
index 1a0a9ce..c86f5f7 100644 (file)
@@ -109,6 +109,7 @@ private:
   bool selectIntrinsicIcmp(MachineInstr &MI) const;
   bool selectBallot(MachineInstr &I) const;
   bool selectRelocConstant(MachineInstr &I) const;
+  bool selectReturnAddress(MachineInstr &I) const;
   bool selectG_INTRINSIC(MachineInstr &I) const;
 
   bool selectEndCfIntrinsic(MachineInstr &MI) const;
index 9674474..f71e1be 100644 (file)
@@ -4023,7 +4023,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     case Intrinsic::amdgcn_kernarg_segment_ptr:
     case Intrinsic::amdgcn_s_getpc:
     case Intrinsic::amdgcn_groupstaticsize:
-    case Intrinsic::amdgcn_reloc_constant: {
+    case Intrinsic::amdgcn_reloc_constant:
+    case Intrinsic::returnaddress: {
       unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
       OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
       break;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir
new file mode 100644 (file)
index 0000000..72927e7
--- /dev/null
@@ -0,0 +1,122 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: return_address_already_live_in_copy
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$sgpr30_sgpr31', virtual-reg: '%0' }
+
+body: |
+  bb.0:
+    liveins: $sgpr30_sgpr31
+    ; CHECK-LABEL: name: return_address_already_live_in_copy
+    ; CHECK: liveins: $sgpr30_sgpr31
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+    ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]]
+    %0:sgpr(p0) = COPY $sgpr30_sgpr31
+    %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+    S_ENDPGM 0, implicit %0, implicit %1
+...
+
+---
+name: return_address_already_block_live_in_copy_not_mf_life_in
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr30_sgpr31
+    ; CHECK-LABEL: name: return_address_already_block_live_in_copy_not_mf_life_in
+    ; CHECK: liveins: $sgpr30_sgpr31
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+    ; CHECK: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY]]
+    %0:sgpr(p0) = COPY $sgpr30_sgpr31
+    %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+    S_ENDPGM 0, implicit %0, implicit %1
+...
+
+---
+name: return_address_no_live_in
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: return_address_no_live_in
+    ; CHECK: liveins: $sgpr30_sgpr31
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+    ; CHECK: S_ENDPGM 0, implicit [[COPY]]
+    %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+    S_ENDPGM 0, implicit %0
+...
+
+---
+name: return_address_no_live_in_non_entry_block
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  ; CHECK-LABEL: name: return_address_no_live_in_non_entry_block
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $sgpr30_sgpr31
+  ; CHECK:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+  ; CHECK:   S_BRANCH %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   S_ENDPGM 0, implicit [[COPY]]
+  bb.0:
+    G_BR %bb.1
+
+  bb.1:
+    %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+    S_ENDPGM 0, implicit %0
+...
+
+---
+name: return_address_multi_use
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  ; CHECK-LABEL: name: return_address_multi_use
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $sgpr30_sgpr31
+  ; CHECK:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+  ; CHECK:   S_BRANCH %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]]
+  bb.0:
+    %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+    G_BR %bb.1
+
+  bb.1:
+    %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+    S_ENDPGM 0, implicit %0, implicit %1
+...
+
+---
+name: return_address_kernel_is_null
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: return_address_kernel_is_null
+    ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+    ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_]]
+    %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+    S_ENDPGM 0, implicit %0
+...
index 1db6e3e..7937ba6 100644 (file)
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
 
 ; Test with zero frame
 ; GCN-LABEL: {{^}}func1
@@ -25,7 +26,7 @@ entry:
 ; Test with amdgpu_kernel
 ; GCN-LABEL: {{^}}func3
 ; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
+; GCN: v_mov_b32_e32 v1, {{v0|0}}
 define amdgpu_kernel void @func3(i8** %out) nounwind {
 entry:
   %tmp = tail call i8* @llvm.returnaddress(i32 0)
@@ -36,7 +37,7 @@ entry:
 ; Test with use outside the entry-block
 ; GCN-LABEL: {{^}}func4
 ; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
+; GCN: v_mov_b32_e32 v1, {{v0|0}}
 define amdgpu_kernel void @func4(i8** %out, i32 %val) nounwind {
 entry:
   %cmp = icmp ne i32 %val, 0
@@ -61,5 +62,22 @@ entry:
   unreachable
 }
 
+declare void @callee()
+
+; GCN-LABEL: {{^}}multi_use:
+; GCN-DAG: v_mov_b32_e32 v[[LO:4[0-9]+]], s30
+; GCN-DAG: v_mov_b32_e32 v[[HI:4[0-9]+]], s31
+; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: s_swappc_b64
+; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @multi_use() nounwind {
+entry:
+  %ret0 = tail call i8* @llvm.returnaddress(i32 0)
+  store volatile i8* %ret0, i8* addrspace(1)* undef
+  call void @callee()
+  %ret1 = tail call i8* @llvm.returnaddress(i32 0)
+  store volatile i8* %ret1, i8* addrspace(1)* undef
+  ret void
+}
 
 declare i8* @llvm.returnaddress(i32) nounwind readnone