From 00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 10 Jan 2023 20:08:00 -0500 Subject: [PATCH] clang/OpenCL: Fix not setting convergent on block invoke kernels Yet another example how convergent not being the default is dangerous and backwards. --- clang/lib/CodeGen/TargetInfo.cpp | 2 ++ clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 10 +++++----- clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 6 ++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 1b80529..7e08d42 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -12451,6 +12451,7 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel( // FIXME: Apply default attributes F->addFnAttr(llvm::Attribute::NoUnwind); + F->addFnAttr(llvm::Attribute::Convergent); Builder.CreateRetVoid(); Builder.restoreIP(IP); @@ -12504,6 +12505,7 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( // FIXME: Apply default attributes F->addFnAttr(llvm::Attribute::NoUnwind); + F->addFnAttr(llvm::Attribute::Convergent); F->addFnAttr("enqueued-block"); auto IP = CGF.Builder.saveIP(); diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 4277dbb..17c5fc6 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -182,7 +182,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: nounwind +// CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: @@ -216,7 +216,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: nounwind +// CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: @@ -255,7 +255,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: nounwind +// CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 { // CHECK-NEXT: entry: @@ -282,7 +282,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: nounwind +// CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel // CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: @@ -297,7 +297,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="false" } // CHECK: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } // CHECK: attributes #3 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #4 = { nounwind "enqueued-block" } +// CHECK: attributes #4 = { convergent nounwind "enqueued-block" } // CHECK: attributes #5 = { convergent nounwind } //. // CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl index 3cfb5f5..bce1a92 100644 --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -297,7 +297,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { }; // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. - // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) [[INVOKE_ATTR:#[0-9]+]] block_A(); // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. @@ -393,7 +393,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: ret void // COMMON: } // COMMON: define spir_kernel void [[INVLK2]](ptr addrspace(4){{.*}}) -// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) +// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) [[INVOKE_ATTR:#[0-9]+]] // COMMON: define spir_kernel void [[INVGK2]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) // COMMON: define spir_kernel void [[INVGK3]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) // COMMON: define spir_kernel void [[INVGK4]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) @@ -412,3 +412,5 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: define spir_kernel void [[INVGK9]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) // COMMON: define spir_kernel void [[INVGK10]](ptr addrspace(4){{.*}}) // COMMON: define spir_kernel void [[INVGK11]](ptr addrspace(4){{.*}}) + +// COMMON: attributes [[INVOKE_ATTR]] = { convergent nounwind } -- 2.7.4