From d12ee4bf7c14a00b14890fc3042edd659dde7fb2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 12 Jan 2023 10:05:28 -0500 Subject: [PATCH] clang/OpenCL: Extend tests for enqueued block attributes Baseline tests showing that enqueued blocks are not getting the correct attributes applied. --- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 155 +++++++++++++++++++--- 1 file changed, 137 insertions(+), 18 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 17c5fc6..c56aee0 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR -// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa -target-cpu gfx900 %s | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefix=CHECK typedef struct {int a;} ndrange_t; @@ -35,9 +35,40 @@ kernel void test(global char *a, char b, global long *c, long d) { enqueue_kernel(default_queue, flags, ndrange, block); } + +// Test that target attributes are applied to the functions inserted for the +// block. +__attribute__((target("s-memtime-inst"))) +kernel void test_target_features_kernel(global int *i) { + queue_t default_queue; + unsigned flags = 0; + ndrange_t ndrange; + + enqueue_kernel(default_queue, flags, ndrange, + ^(void) { + __builtin_amdgcn_s_memtime(); + }); +} + +__attribute__((target("s-memtime-inst"))) +void test_target_features_func(global int *i) { + queue_t default_queue; + unsigned flags = 0; + ndrange_t ndrange; + + enqueue_kernel(default_queue, flags, ndrange, + ^(void) { + __builtin_amdgcn_s_memtime(); + }); +} + +//. +// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0 +// CHECK: @__block_literal_global.1 = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_func_block_invoke }, align 8 #0 +//. // CHECK: Function Attrs: convergent noinline norecurse nounwind optnone // CHECK-LABEL: define {{[^@]+}}@callee -// CHECK-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5) // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) @@ -53,7 +84,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // // CHECK: Function Attrs: convergent noinline norecurse nounwind optnone // CHECK-LABEL: define {{[^@]+}}@test -// CHECK-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { +// CHECK-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) @@ -167,7 +198,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // // CHECK: Function Attrs: convergent noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke -// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -184,7 +215,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // // CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel -// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) // CHECK-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -195,7 +226,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // // CHECK: Function Attrs: convergent noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2 -// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -218,7 +249,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // // CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel -// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // CHECK-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -229,7 +260,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // // CHECK: Function Attrs: convergent noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3 -// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) @@ -257,7 +288,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // // CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel -// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 { +// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // CHECK-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8 @@ -268,7 +299,7 @@ kernel void test(global char *a, char b, global long *c, long d) { // // CHECK: Function Attrs: convergent noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4 -// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -278,13 +309,13 @@ kernel void test(global char *a, char b, global long *c, long d) { // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8 // CHECK-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8 -// CHECK-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR5:[0-9]+]] +// CHECK-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR9:[0-9]+]] // CHECK-NEXT: ret void // // // CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel -// CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) // CHECK-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -292,13 +323,99 @@ kernel void test(global char *a, char b, global long *c, long d) { // CHECK-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]]) // CHECK-NEXT: ret void // +// +// CHECK: Function Attrs: convergent noinline norecurse nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@test_target_features_kernel +// CHECK-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space !15 !kernel_arg_access_qual !8 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !10 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent nounwind +// CHECK-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel +// CHECK-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) +// CHECK-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr +// CHECK-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent noinline norecurse nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@test_target_features_func +// CHECK-SAME: (ptr addrspace(1) noundef [[I:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_func_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global.1 to ptr)) +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@__test_target_features_func_block_invoke +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent nounwind +// CHECK-LABEL: define {{[^@]+}}@__test_target_features_func_block_invoke_kernel +// CHECK-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) +// CHECK-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr +// CHECK-NEXT: call void @__test_target_features_func_block_invoke(ptr [[TMP2]]) +// CHECK-NEXT: ret void +// //. -// CHECK: attributes #0 = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="false" } -// CHECK: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// CHECK: attributes #3 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #4 = { convergent nounwind "enqueued-block" } -// CHECK: attributes #5 = { convergent nounwind } +// CHECK: attributes #0 = { "objc_arc_inert" } +// CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #2 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +// CHECK: attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK: attributes #4 = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #5 = { convergent nounwind "enqueued-block" } +// CHECK: attributes #6 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" } +// CHECK: attributes #7 = { nocallback nofree nosync nounwind willreturn } +// CHECK: attributes #8 = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" } +// CHECK: attributes #9 = { convergent nounwind } //. // CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // CHECK: !1 = !{i32 1, !"wchar_size", i32 4} @@ -315,4 +432,6 @@ kernel void test(global char *a, char b, global long *c, long d) { // CHECK: !12 = !{!"none", !"none"} // CHECK: !13 = !{!"__block_literal", !"void*"} // CHECK: !14 = !{!"", !""} +// CHECK: !15 = !{i32 1} +// CHECK: !16 = !{!"int*"} //. -- 2.7.4