BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
+BUILTIN(__nvvm_ldg_h, "hhC*", "")
BUILTIN(__nvvm_ldg_f, "ffC*", "")
BUILTIN(__nvvm_ldg_d, "ddC*", "")
BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
+BUILTIN(__nvvm_ldg_h2, "E2hE2hC*", "")
BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
// elements, its alignment is set to number of elements times the alignment
// of its member: n*alignof(t)."
return MakeLdg(Intrinsic::nvvm_ldg_global_i);
+ case NVPTX::BI__nvvm_ldg_h:
case NVPTX::BI__nvvm_ldg_f:
+ case NVPTX::BI__nvvm_ldg_h2:
case NVPTX::BI__nvvm_ldg_f2:
case NVPTX::BI__nvvm_ldg_f4:
case NVPTX::BI__nvvm_ldg_d:
declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+
declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
-
-; CHECK: func0
-define i8 @func0(ptr addrspace(1) %ptr) {
+; CHECK: test_ldu_i8
+define i8 @test_ldu_i8(ptr addrspace(1) %ptr) {
; ldu.global.u8
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
ret i8 %val
}
-; CHECK: func1
-define i32 @func1(ptr addrspace(1) %ptr) {
+; CHECK: test_ldu_i32
+define i32 @test_ldu_i32(ptr addrspace(1) %ptr) {
; ldu.global.u32
%val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
ret i32 %val
}
-; CHECK: func2
-define i8 @func2(ptr addrspace(1) %ptr) {
+; CHECK: test_ldg_i8
+define i8 @test_ldg_i8(ptr addrspace(1) %ptr) {
; ld.global.nc.u8
%val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
ret i8 %val
}
-; CHECK: func3
-define i32 @func3(ptr addrspace(1) %ptr) {
+; CHECK: test_ldg_i16
+define i16 @test_ldg_i16(ptr addrspace(1) %ptr) {
+; ld.global.nc.u16
+ %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret i16 %val
+}
+
+; CHECK: test_ldg_i32
+define i32 @test_ldg_i32(ptr addrspace(1) %ptr) {
; ld.global.nc.u32
%val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
ret i32 %val
}
+
+; CHECK: test_ldg_i64
+define i64 @test_ldg_i64(ptr addrspace(1) %ptr) {
+; ld.global.nc.u64
+ %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret i64 %val
+}
+
+; CHECK: test_ldg_f32
+define float @test_ldg_f32(ptr addrspace(1) %ptr) {
+; ld.global.nc.u64
+ %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret float %val
+}
+
+; CHECK: test_ldg_f64
+define double @test_ldg_f64(ptr addrspace(1) %ptr) {
+; ld.global.nc.u64
+ %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret double %val
+}
+
+; CHECK: test_ldg_f16
+define half @test_ldg_f16(ptr addrspace(1) %ptr) {
+; ld.global.nc.b16
+ %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret half %val
+}
+
+; CHECK: test_ldg_v2f16
+define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
+; ld.global.nc.b32
+ %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret <2 x half> %val
+}