AMDGPU: Add __builtin_amdgcn_permlane64
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 28 Sep 2022 21:55:30 +0000 (17:55 -0400)
committerMatt Arsenault <arsenm2@gmail.com>
Fri, 14 Oct 2022 04:12:11 +0000 (21:12 -0700)
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
clang/test/SemaOpenCL/builtins-amdgcn-gfx11.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td

index d8f36f5..d4d16d5 100644 (file)
@@ -259,6 +259,9 @@ TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_lh, "V4UiWUifV4fV4hV4hV4
 // GFX11+ only builtins.
 //===----------------------------------------------------------------------===//
 
+// TODO: This is a no-op in wave32. Should the builtin require wavefrontsize64?
+TARGET_BUILTIN(__builtin_amdgcn_permlane64, "UiUi", "nc", "gfx11-insts")
+
 //===----------------------------------------------------------------------===//
 // WMMA builtins.
 // Postfix w32 indicates the builtin requires wavefront size of 32.
index 4d64d7c..a4f2d61 100644 (file)
@@ -31,3 +31,9 @@ void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1)
 {
   *out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, 128);
 }
+
+// CHECK-LABEL: @test_permlane64(
+// CHECK: call i32 @llvm.amdgcn.permlane64(i32 %a)
+void test_permlane64(global uint* out, uint a) {
+  *out = __builtin_amdgcn_permlane64(a);
+}
index adfd236..823d302 100644 (file)
@@ -13,4 +13,6 @@ void test(global uint* out1, global ulong* out2, int x) {
 #if __has_builtin(__builtin_amdgcn_s_sendmsg_rtnl)
   *out2 = __builtin_amdgcn_s_sendmsg_rtnl(x); // GFX11-error {{argument to '__builtin_amdgcn_s_sendmsg_rtnl' must be a constant integer}}
 #endif
+
+  *out1 = __builtin_amdgcn_permlane64(x); // GFX10-error {{'__builtin_amdgcn_permlane64' needs target feature gfx11-insts}}
 }
index a90c44b..2c63d2e 100644 (file)
@@ -1991,6 +1991,7 @@ def int_amdgcn_image_bvh_intersect_ray :
 
 // llvm.amdgcn.permlane64 <src0>
 def int_amdgcn_permlane64 :
+  ClangBuiltin<"__builtin_amdgcn_permlane64">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
             [IntrNoMem, IntrConvergent, IntrWillReturn]>;