//===----------------------------------------------------------------------===//
#include "AMDGPUHSAMetadataStreamer.h"
-#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
for (auto &Arg : Func.args())
emitKernelArg(Arg);
- // TODO: What about other languages?
- if (!Func.getParent()->getNamedMetadata("opencl.ocl.version"))
- return;
-
- auto &DL = Func.getParent()->getDataLayout();
- auto Int64Ty = Type::getInt64Ty(Func.getContext());
-
- emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
- emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
- emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
-
- auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
- AMDGPUASI.GLOBAL_ADDRESS);
-
- // Emit "printf buffer" argument if printf is used, otherwise emit dummy
- // "none" argument.
- if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
- else
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
-
- // Emit "default queue" and "completion action" arguments if enqueue kernel is
- // used, otherwise emit dummy "none" arguments.
- if (Func.hasFnAttribute("calls-enqueue-kernel")) {
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
- } else {
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
- }
+ emitHiddenKernelArgs(Func);
}
void MetadataStreamer::emitKernelArg(const Argument &Arg) {
}
}
+void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
+ int HiddenArgNumBytes =
+ getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
+
+ if (!HiddenArgNumBytes)
+ return;
+
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ if (HiddenArgNumBytes >= 8)
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
+ if (HiddenArgNumBytes >= 16)
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
+ if (HiddenArgNumBytes >= 24)
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
+
+ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
+ AMDGPUASI.GLOBAL_ADDRESS);
+
+ // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+ // "none" argument.
+ if (HiddenArgNumBytes >= 32) {
+ if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+ else
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ }
+
+ // Emit "default queue" and "completion action" arguments if enqueue kernel is
+ // used, otherwise emit dummy "none" arguments.
+ if (HiddenArgNumBytes >= 48) {
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ }
+ }
+}
+
void MetadataStreamer::begin(const Module &Mod) {
AMDGPUASI = getAMDGPUAS(Mod);
emitVersion();
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NOT: ValueKind: HiddenDefaultQueue
; CHECK-NOT: ValueKind: HiddenCompletionAction
-define amdgpu_kernel void @test_char(i8 %a)
+define amdgpu_kernel void @test_char(i8 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
!kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_ushort2(<2 x i16> %a)
+define amdgpu_kernel void @test_ushort2(<2 x i16> %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10
!kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_int3(<3 x i32> %a)
+define amdgpu_kernel void @test_int3(<3 x i32> %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11
!kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_ulong4(<4 x i64> %a)
+define amdgpu_kernel void @test_ulong4(<4 x i64> %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12
!kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_half8(<8 x half> %a)
+define amdgpu_kernel void @test_half8(<8 x half> %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13
!kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_float16(<16 x float> %a)
+define amdgpu_kernel void @test_float16(<16 x float> %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14
!kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_double16(<16 x double> %a)
+define amdgpu_kernel void @test_double16(<16 x double> %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15
!kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a)
+define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16
!kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a)
+define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17
!kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_sampler(i32 %a)
+define amdgpu_kernel void @test_sampler(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18
!kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a)
+define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19
!kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a)
+define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
!kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_i128(i128 %a)
+define amdgpu_kernel void @test_i128(i128 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21
!kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c)
+define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0
!kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24
!kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
ret void
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
i32 addrspace(4)* %c,
- i32 addrspace(3)* %l)
+ i32 addrspace(3)* %l) #0
!kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
!kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
ret void
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a,
i32 addrspace(1)* %b,
- %opencl.pipe_t addrspace(1)* %c)
+ %opencl.pipe_t addrspace(1)* %c) #0
!kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51
!kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
ret void
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro,
%opencl.image2d_t addrspace(1)* %wo,
- %opencl.image3d_t addrspace(1)* %rw)
+ %opencl.image3d_t addrspace(1)* %rw) #0
!kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62
!kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_vec_type_hint_half(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_vec_type_hint_float(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_vec_type_hint_double(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_vec_type_hint_char(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_vec_type_hint_short(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_vec_type_hint_long(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a)
+define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5
!reqd_work_group_size !6 {
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a)
+define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7
!work_group_size_hint !8 {
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a)
+define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a) #0
!kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80
!kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a)
+define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82
!kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a)
+define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83
!kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @test_arg_unknown_builtin_type(
- %opencl.clk_event_t addrspace(1)* %a)
+ %opencl.clk_event_t addrspace(1)* %a) #0
!kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84
!kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
ret void
<4 x i8> addrspace(3)* %e,
<8 x i8> addrspace(3)* %f,
<16 x i8> addrspace(3)* %g,
- {} addrspace(3)* %h)
+ {} addrspace(3)* %h) #0
!kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
!kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
ret void
<4 x i8> addrspace(3)* align 256 %e,
<8 x i8> addrspace(3)* align 128 %f,
<16 x i8> addrspace(3)* align 1024 %g,
- {} addrspace(3)* align 16 %h)
+ {} addrspace(3)* align 16 %h) #0
!kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
!kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
ret void
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @__test_block_invoke_kernel(
- <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #0
+ <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #1
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110
!kernel_arg_base_type !110 !kernel_arg_type_qual !4 {
ret void
; CHECK-NEXT: ValueKind: HiddenCompletionAction
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #2
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
!kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
ret void
}
-attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
-attributes #1 = { "calls-enqueue-kernel" }
+attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
+attributes #1 = { "amdgpu-implicitarg-num-bytes"="48" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
+attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" "calls-enqueue-kernel" }
!llvm.printf.fmts = !{!100, !101}
; CHECK: Version: [ 1, 0 ]
; CHECK: Kernels:
-; CHECK: - Name: test
-; CHECK: SymbolName: 'test@kd'
+; CHECK: - Name: test0
+; CHECK: SymbolName: 'test0@kd'
+; CHECK: Args:
+; CHECK-NEXT: - Name: r
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: a
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: b
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: CodeProps:
+define amdgpu_kernel void @test0(
+ half addrspace(1)* %r,
+ half addrspace(1)* %a,
+ half addrspace(1)* %b) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %b.val = load half, half addrspace(1)* %b
+ %r.val = fadd half %a.val, %b.val
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+; CHECK: - Name: test8
+; CHECK: SymbolName: 'test8@kd'
+; CHECK: Args:
+; CHECK-NEXT: - Name: r
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: a
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: b
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: CodeProps:
+define amdgpu_kernel void @test8(
+ half addrspace(1)* %r,
+ half addrspace(1)* %a,
+ half addrspace(1)* %b) #0 {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %b.val = load half, half addrspace(1)* %b
+ %r.val = fadd half %a.val, %b.val
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+; CHECK: - Name: test16
+; CHECK: SymbolName: 'test16@kd'
+; CHECK: Args:
+; CHECK-NEXT: - Name: r
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: a
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: b
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: CodeProps:
+define amdgpu_kernel void @test16(
+ half addrspace(1)* %r,
+ half addrspace(1)* %a,
+ half addrspace(1)* %b) #1 {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %b.val = load half, half addrspace(1)* %b
+ %r.val = fadd half %a.val, %b.val
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+; CHECK: - Name: test24
+; CHECK: SymbolName: 'test24@kd'
+; CHECK: Args:
+; CHECK-NEXT: - Name: r
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: a
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: b
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: CodeProps:
+define amdgpu_kernel void @test24(
+ half addrspace(1)* %r,
+ half addrspace(1)* %a,
+ half addrspace(1)* %b) #2 {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %b.val = load half, half addrspace(1)* %b
+ %r.val = fadd half %a.val, %b.val
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+; CHECK: - Name: test32
+; CHECK: SymbolName: 'test32@kd'
; CHECK: Args:
; CHECK-NEXT: - Name: r
; CHECK-NEXT: Size: 8
; CHECK-NEXT: ValueKind: HiddenNone
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: CodeProps:
+define amdgpu_kernel void @test32(
+ half addrspace(1)* %r,
+ half addrspace(1)* %a,
+ half addrspace(1)* %b) #3 {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %b.val = load half, half addrspace(1)* %b
+ %r.val = fadd half %a.val, %b.val
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+; CHECK: - Name: test48
+; CHECK: SymbolName: 'test48@kd'
+; CHECK: Args:
+; CHECK-NEXT: - Name: r
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: a
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: b
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: ValueType: I64
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenNone
; CHECK-NEXT: ValueKind: HiddenNone
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test(
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: CodeProps:
+define amdgpu_kernel void @test48(
half addrspace(1)* %r,
half addrspace(1)* %a,
- half addrspace(1)* %b) {
+ half addrspace(1)* %b) #4 {
entry:
%a.val = load half, half addrspace(1)* %a
%b.val = load half, half addrspace(1)* %b
ret void
}
-!opencl.ocl.version = !{!0}
-!0 = !{i32 2, i32 0}
+attributes #0 = { "amdgpu-implicitarg-num-bytes"="8" }
+attributes #1 = { "amdgpu-implicitarg-num-bytes"="16" }
+attributes #2 = { "amdgpu-implicitarg-num-bytes"="24" }
+attributes #3 = { "amdgpu-implicitarg-num-bytes"="32" }
+attributes #4 = { "amdgpu-implicitarg-num-bytes"="48" }