AMDGPU/clang: Remove target features from address space test builtins
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 28 Nov 2022 22:26:14 +0000 (17:26 -0500)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Thu, 29 Dec 2022 23:46:41 +0000 (18:46 -0500)
It turns out we can codegen these on targets without flat addressing,
although the runtime probably didn't put anything useful there. The
proper diagnostic would be to disallow flat pointer uses or languages
with them, not this one edge case. Allows removing one of the special
cases requiring subtarget support in the device libraries.

clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn-flat-address-space.cl [new file with mode: 0644]
clang/test/CodeGenOpenCL/builtins-amdgcn.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-flat-address-space.cl
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll

index 9e71709..5c39bae 100644 (file)
@@ -165,13 +165,17 @@ BUILTIN(__builtin_amdgcn_fcmp, "WUiddIi", "nc")
 BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc")
 
 //===----------------------------------------------------------------------===//
+// Flat addressing builtins.
+//===----------------------------------------------------------------------===//
+BUILTIN(__builtin_amdgcn_is_shared, "bvC*0", "nc")
+BUILTIN(__builtin_amdgcn_is_private, "bvC*0", "nc")
+
+//===----------------------------------------------------------------------===//
 // CI+ only builtins.
 //===----------------------------------------------------------------------===//
 TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts")
 TARGET_BUILTIN(__builtin_amdgcn_buffer_wbinvl1_vol, "v", "n", "ci-insts")
 TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_release_all, "vUi", "n", "ci-insts")
-TARGET_BUILTIN(__builtin_amdgcn_is_shared, "bvC*0", "nc", "flat-address-space")
-TARGET_BUILTIN(__builtin_amdgcn_is_private, "bvC*0", "nc", "flat-address-space")
 
 //===----------------------------------------------------------------------===//
 // Interpolation builtins.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-flat-address-space.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-flat-address-space.cl
new file mode 100644 (file)
index 0000000..57ee1c1
--- /dev/null
@@ -0,0 +1,22 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -emit-llvm -disable-llvm-passes -o - %s | FileCheck -enable-var-scope %s
+
+// SI did not actually support flat addressing, but we can codegen the address
+// space test builtins. The target specfic part is a load from the implicit
+// argument buffer to use for the high pointer bits. It's just that buffer won't
+// be initialized to something useful. The proper way to diagnose invalid flat
+// usage is to forbid flat pointers on unsupported targets.
+
+// CHECK-LABEL: @test_is_shared_global(
+// CHECK: [[CAST:%[0-9]+]] = addrspacecast ptr addrspace(1) %{{[0-9]+}} to ptr
+// CHECK: call i1 @llvm.amdgcn.is.shared(ptr [[CAST]]
+int test_is_shared_global(const global int* ptr) {
+  return __builtin_amdgcn_is_shared(ptr);
+}
+
+// CHECK-LABEL: @test_is_private_global(
+// CHECK: [[CAST:%[0-9]+]] = addrspacecast ptr addrspace(1) %{{[0-9]+}} to ptr
+// CHECK: call i1 @llvm.amdgcn.is.private(ptr [[CAST]]
+int test_is_private_global(const global int* ptr) {
+  return __builtin_amdgcn_is_private(ptr);
+}
index eb1fa22..9a20dba 100644 (file)
@@ -1,5 +1,5 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -cl-std=CL2.0 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -no-opaque-pointers -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
index 0d759ba..86ee55a 100644 (file)
@@ -1,8 +1,12 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
 
+// expected-no-diagnostics
+
+// Make sure no warning is produced on due to dead "flat-address-space" feature.
+__attribute__((target("flat-address-space")))
 void test_flat_address_space_builtins(int* ptr)
 {
-  (void)__builtin_amdgcn_is_shared(ptr); // expected-error {{'__builtin_amdgcn_is_shared' needs target feature flat-address-space}}
-  (void)__builtin_amdgcn_is_private(ptr); // expected-error {{'__builtin_amdgcn_is_private' needs target feature flat-address-space}}
+  (void)__builtin_amdgcn_is_shared(ptr);
+  (void)__builtin_amdgcn_is_private(ptr);
 }
index 2a92101..ff58d05 100644 (file)
@@ -1,8 +1,9 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
 ; GCN-LABEL: {{^}}is_private_vgpr:
-; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
+; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
 ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
 ; CI: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
 
index 1a10a9c..7479fc8 100644 (file)
@@ -1,8 +1,9 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
 ; GCN-LABEL: {{^}}is_local_vgpr:
-; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
+; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
 ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
 
 ; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base