From 23c6d1501d80073784cab367d30d50419ffa5706 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 4 Nov 2020 16:07:57 -0500 Subject: [PATCH] [amdgpu] Add `llvm.amdgcn.endpgm` support. - `llvm.amdgcn.endpgm` is added to enable "abort" support. Differential Revision: https://reviews.llvm.org/D90809 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 ++ clang/test/CodeGenCUDA/builtins-amdgcn.cu | 6 ++++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 +++ llvm/lib/Target/AMDGPU/SOPInstructions.td | 6 ++++ llvm/test/CodeGen/AMDGPU/amd.endpgm.ll | 50 ++++++++++++++++++++++++++++ 5 files changed, 68 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/amd.endpgm.ll diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f5901e6..123a7ad 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -214,6 +214,8 @@ BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc") BUILTIN(__builtin_amdgcn_read_exec_lo, "Ui", "nc") BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc") +BUILTIN(__builtin_amdgcn_endpgm, "v", "nr") + //===----------------------------------------------------------------------===// // R600-NI only builtins. //===----------------------------------------------------------------------===// diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu index 1c3a790..8f0d0d0 100644 --- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -16,3 +16,9 @@ void test_ds_fmax(float src) { __shared__ float shared; volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false); } + +// CHECK-LABEL: @_Z6endpgmv( +// CHECK: call void @llvm.amdgcn.endpgm() +__global__ void endpgm() { + __builtin_amdgcn_endpgm(); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 304377c..bc04fa4 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1577,6 +1577,10 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], // FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn? def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; +def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">, + Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects] +>; + // Copies the active channels of the source value to the destination value, // with the guarantee that the source value is computed as if the entire // program were executed in Whole Wavefront Mode, i.e. with all channels diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 08966d7..0052717 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1118,6 +1118,7 @@ let isTerminator = 1 in { def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins EndpgmImm:$simm16), "$simm16"> { let isBarrier = 1; let isReturn = 1; + let hasSideEffects = 1; } def S_ENDPGM_SAVED : SOPP_Pseudo<"s_endpgm_saved", (ins)> { @@ -1329,6 +1330,11 @@ def : GCNPat < >; def : GCNPat < + (int_amdgcn_endpgm), + (S_ENDPGM (i16 0)) +>; + +def : GCNPat < (i64 (ctpop i64:$src)), (i64 (REG_SEQUENCE SReg_64, (i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0, diff --git a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll new file mode 100644 index 0000000..ac9cd06 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +define amdgpu_kernel void @test0() { +; CHECK-LABEL: test0: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_endpgm + tail call void @llvm.amdgcn.endpgm() + unreachable +} + +define void @test1() { +; CHECK-LABEL: test1: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_endpgm + tail call void @llvm.amdgcn.endpgm() + unreachable +} + +define amdgpu_kernel void @test2(i32* %p, i32 %x) { +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_load_dword s2, s[0:1], 0x2c +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_cmp_lt_i32 s2, 1 +; CHECK-NEXT: s_cbranch_scc0 BB2_2 +; CHECK-NEXT: ; %bb.1: ; %else +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; CHECK-NEXT: v_mov_b32_e32 v2, s2 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: flat_store_dword v[0:1], v2 +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: BB2_2: ; %then +; CHECK-NEXT: s_endpgm + %cond = icmp sgt i32 %x, 0 + br i1 %cond, label %then, label %else + +then: + tail call void @llvm.amdgcn.endpgm() + unreachable + +else: + store i32 %x, i32* %p + ret void +} + +declare void @llvm.amdgcn.endpgm() -- 2.7.4