From 796a60d2ea32320f298f91beb04f015934598821 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Mon, 1 Mar 2021 09:45:55 +0000
Subject: [PATCH] [AMDGPU] New intrinsic void llvm.amdgcn.s.sethalt(i32)

The expected use case is for frontends to insert this into
shaders that are to be run under a debugger. The shader can
then be resumed or single stepped from the point of the call
under debugger control.

Differential Revision: https://reviews.llvm.org/D97670
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td          |  4 ++++
 llvm/lib/Target/AMDGPU/SOPInstructions.td         |  3 ++-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sethalt.ll | 28 +++++++++++++++++++++++
 3 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sethalt.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 088aadc..d122aca 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1283,6 +1283,10 @@ def int_amdgcn_s_decperflevel :
                                 IntrHasSideEffects, IntrWillReturn]> {
 }
 
+def int_amdgcn_s_sethalt :
+  Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
+                                IntrHasSideEffects, IntrWillReturn]>;
+
 def int_amdgcn_s_getreg :
   GCCBuiltin<"__builtin_amdgcn_s_getreg">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 50725de..115aff6 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1228,7 +1228,8 @@ def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
 let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
 def S_WAITCNT : SOPP_Pseudo <"s_waitcnt" , (ins WAIT_FLAG:$simm16), "$simm16",
     [(int_amdgcn_s_waitcnt timm:$simm16)]>;
-def S_SETHALT : SOPP_Pseudo <"s_sethalt" , (ins i16imm:$simm16), "$simm16">;
+def S_SETHALT : SOPP_Pseudo <"s_sethalt" , (ins i32imm:$simm16), "$simm16",
+    [(int_amdgcn_s_sethalt timm:$simm16)]>;
 def S_SETKILL : SOPP_Pseudo <"s_setkill" , (ins i16imm:$simm16), "$simm16">;
 
 // On SI the documentation says sleep for approximately 64 * low 2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sethalt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sethalt.ll
new file mode 100644
index 0000000..bc2900a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sethalt.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_kernel void @test_s_sethalt() {
+; GCN-LABEL: test_s_sethalt:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_sethalt 0
+; GCN-NEXT:    s_sethalt 1
+; GCN-NEXT:    s_sethalt 2
+; GCN-NEXT:    s_sethalt 3
+; GCN-NEXT:    s_sethalt 4
+; GCN-NEXT:    s_sethalt 5
+; GCN-NEXT:    s_sethalt 6
+; GCN-NEXT:    s_sethalt 7
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.sethalt(i32 0)
+  call void @llvm.amdgcn.s.sethalt(i32 1)
+  call void @llvm.amdgcn.s.sethalt(i32 2)
+  call void @llvm.amdgcn.s.sethalt(i32 3)
+  call void @llvm.amdgcn.s.sethalt(i32 4)
+  call void @llvm.amdgcn.s.sethalt(i32 5)
+  call void @llvm.amdgcn.s.sethalt(i32 6)
+  call void @llvm.amdgcn.s.sethalt(i32 7)
+  ret void
+}
+
+declare void @llvm.amdgcn.s.sethalt(i32)
-- 
2.7.4