From b2ff8dfea0664584630cc940793992235d7d8537 Mon Sep 17 00:00:00 2001 From: Konstantin Zhuravlyov Date: Fri, 26 May 2017 20:38:26 +0000 Subject: [PATCH] Resubmit r303859 with test fixed. [AMDGPU] add intrinsic for s_getpc Summary: The s_getpc instruction is exposed as intrinsic llvm.amdgcn.s.getpc. Patch by Tim Corringham llvm-svn: 304031 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 10 ++++++++++ llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 +++- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll | 15 +++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index d7413fe..e192854 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -566,6 +566,16 @@ def int_amdgcn_s_getreg : [IntrReadMem, IntrSpeculatable] >; +// int_amdgcn_s_getpc is provided to allow a specific style of position +// independent code to determine the high part of its address when it is +// known (through convention) that the code and any data of interest does +// not cross a 4Gb address boundary. Use for any other purpose may not +// produce the desired results as optimizations may cause code movement, +// especially as we explicitly use IntrNoMem to allow optimizations. +def int_amdgcn_s_getpc : + GCCBuiltin<"__builtin_amdgcn_s_getpc">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; + // __builtin_amdgcn_interp_mov , , , // param values: 0 = P10, 1 = P20, 2 = P0 def int_amdgcn_interp_mov : diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index f2d8b6f..ec29a66 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -184,7 +184,9 @@ def S_BITSET0_B32 : SOP1_32 <"s_bitset0_b32">; def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64">; def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32">; def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">; -def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64">; +def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64", + [(set i64:$sdst, (int_amdgcn_s_getpc))] +>; let isTerminator = 1, isBarrier = 1, SchedRW = [WriteBranch] in { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll new file mode 100644 index 0000000..22e15e2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.s.getpc() #0 + +; GCN-LABEL: {{^}}test_s_getpc: +; GCN: s_load_dwordx2 +; GCN-DAG: s_getpc_b64 s{{\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dwordx2 +define amdgpu_kernel void @test_s_getpc(i64 addrspace(1)* %out) #0 { + %tmp = call i64 @llvm.amdgcn.s.getpc() #1 + store volatile i64 %tmp, i64 addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind readnone speculatable } -- 2.7.4