From: Matt Arsenault Date: Fri, 25 Nov 2022 03:53:12 +0000 (-0500) Subject: AMDGPU: Bulk update some generic intrinsic tests to opaque pointers X-Git-Tag: upstream/17.0.6~25944 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fb1d166e1d05272c569ed0502befd929fb78dffc;p=platform%2Fupstream%2Fllvm.git AMDGPU: Bulk update some generic intrinsic tests to opaque pointers Done purely with the script. --- diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll index 49f100f..ac28735 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll @@ -13,12 +13,12 @@ declare <2 x half> @llvm.ceil.v2f16(<2 x half> %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @ceil_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.ceil.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -43,11 +43,11 @@ entry: ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @ceil_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.ceil.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll index eaf632d..23e11c5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll @@ -5,7 +5,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s -define amdgpu_kernel void @cos_f16(half addrspace(1)* %r, half addrspace(1)* %a) { +define amdgpu_kernel void @cos_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) { ; GFX6-LABEL: cos_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -81,13 +81,13 @@ define amdgpu_kernel void @cos_f16(half addrspace(1)* %r, half addrspace(1)* %a) ; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.cos.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } -define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) { +define amdgpu_kernel void @cos_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) { ; GFX6-LABEL: cos_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -190,9 +190,9 @@ define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.cos.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.ll index bd89502..c5dc863 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.cos.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.ll @@ -11,9 +11,9 @@ ;SI: v_cos_f32 ;SI-NOT: v_cos_f32 -define amdgpu_kernel void @test(float addrspace(1)* %out, float %x) #1 { +define amdgpu_kernel void @test(ptr addrspace(1) %out, float %x) #1 { %cos = call float @llvm.cos.f32(float %x) - store float %cos, float addrspace(1)* %out + store float %cos, ptr addrspace(1) %out ret void } @@ -29,9 +29,9 @@ define amdgpu_kernel void @test(float addrspace(1)* %out, float %x) #1 { ;SI: v_cos_f32 ;SI-NOT: v_cos_f32 -define amdgpu_kernel void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 { +define amdgpu_kernel void @testv(ptr addrspace(1) %out, <4 x float> inreg %vx) #1 { %cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx) - store <4 x float> %cos, <4 x float> addrspace(1)* %out + store <4 x float> %cos, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll index 697fdfb..d8358db 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -9,10 +9,10 @@ ; GCN: flat_store_dword ; GCN: s_endpgm -define amdgpu_kernel void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 { +define amdgpu_kernel void @test_debug_value(ptr addrspace(1) nocapture %globalptr_arg) #0 !dbg !4 { entry: - tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, metadata !10, metadata !13), !dbg !14 - store i32 123, i32 addrspace(1)* %globalptr_arg, align 4 + tail call void @llvm.dbg.value(metadata ptr addrspace(1) %globalptr_arg, metadata !10, metadata !13), !dbg !14 + store i32 123, ptr addrspace(1) %globalptr_arg, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll index 8691d21..01e58ec 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll @@ -13,12 +13,12 @@ declare <2 x half> @llvm.exp2.v2f16(<2 x half> %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @exp2_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.exp2.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -43,11 +43,11 @@ entry: ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @exp2_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.exp2.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll index aa0e1e3..6a4fefb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll @@ -11,10 +11,10 @@ ;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} ;SI: v_exp_f32 -define amdgpu_kernel void @test(float addrspace(1)* %out, float %in) { +define amdgpu_kernel void @test(ptr addrspace(1) %out, float %in) { entry: %0 = call float @llvm.exp2.f32(float %in) - store float %0, float addrspace(1)* %out + store float %0, ptr addrspace(1) %out ret void } @@ -34,10 +34,10 @@ entry: ;SI: v_exp_f32 ;SI: v_exp_f32 -define amdgpu_kernel void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { +define amdgpu_kernel void @testv2(ptr addrspace(1) %out, <2 x float> %in) { entry: %0 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in) - store <2 x float> %0, <2 x float> addrspace(1)* %out + store <2 x float> %0, ptr addrspace(1) %out ret void } @@ -68,10 +68,10 @@ entry: ;SI: v_exp_f32 ;SI: v_exp_f32 ;SI: v_exp_f32 -define amdgpu_kernel void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { +define amdgpu_kernel void @testv4(ptr addrspace(1) %out, <4 x float> %in) { entry: %0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in) - store <4 x float> %0, <4 x float> addrspace(1)* %out + store <4 x float> %0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll index 081dc61..e5c927b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll @@ -13,12 +13,12 @@ declare <2 x half> @llvm.floor.v2f16(<2 x half> %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @floor_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.floor.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -43,11 +43,11 @@ entry: ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @floor_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.floor.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll index d4b2d11..d51b4ff 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll @@ -19,15 +19,15 @@ declare <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_f16( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b, - half addrspace(1)* %c) { - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load half, ptr addrspace(1) %a + %b.val = load half, ptr addrspace(1) %b + %c.val = load half, ptr addrspace(1) %c %r.val = call half @llvm.fma.f16(half %a.val, half %b.val, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -45,13 +45,13 @@ define amdgpu_kernel void @fma_f16( ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_f16_imm_a( - half addrspace(1)* %r, - half addrspace(1)* %b, - half addrspace(1)* %c) { - %b.val = load half, half addrspace(1)* %b - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %b.val = load half, ptr addrspace(1) %b + %c.val = load half, ptr addrspace(1) %c %r.val = call half @llvm.fma.f16(half 3.0, half %b.val, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -68,13 +68,13 @@ define amdgpu_kernel void @fma_f16_imm_a( ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_f16_imm_b( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %c) { - %a.val = load half, half addrspace(1)* %a - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %c) { + %a.val = load half, ptr addrspace(1) %a + %c.val = load half, ptr addrspace(1) %c %r.val = call half @llvm.fma.f16(half %a.val, half 3.0, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -91,13 +91,13 @@ define amdgpu_kernel void @fma_f16_imm_b( ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_f16_imm_c( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b) { - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) { + %a.val = load half, ptr addrspace(1) %a + %b.val = load half, ptr addrspace(1) %b %r.val = call half @llvm.fma.f16(half %a.val, half %b.val, half 3.0) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -137,15 +137,15 @@ define amdgpu_kernel void @fma_f16_imm_c( ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b, - <2 x half> addrspace(1)* %c) { - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b - %c.val = load <2 x half>, <2 x half> addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load <2 x half>, ptr addrspace(1) %a + %b.val = load <2 x half>, ptr addrspace(1) %b + %c.val = load <2 x half>, ptr addrspace(1) %c %r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> %c.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -184,13 +184,13 @@ define amdgpu_kernel void @fma_v2f16( ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_v2f16_imm_a( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %b, - <2 x half> addrspace(1)* %c) { - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b - %c.val = load <2 x half>, <2 x half> addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %b.val = load <2 x half>, ptr addrspace(1) %b + %c.val = load <2 x half>, ptr addrspace(1) %c %r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> , <2 x half> %b.val, <2 x half> %c.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -230,13 +230,13 @@ define amdgpu_kernel void @fma_v2f16_imm_a( ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_v2f16_imm_b( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %c) { - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a - %c.val = load <2 x half>, <2 x half> addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %c) { + %a.val = load <2 x half>, ptr addrspace(1) %a + %c.val = load <2 x half>, ptr addrspace(1) %c %r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> %a.val, <2 x half> , <2 x half> %c.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -280,13 +280,13 @@ define amdgpu_kernel void @fma_v2f16_imm_b( ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_v2f16_imm_c( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b) { - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) { + %a.val = load <2 x half>, ptr addrspace(1) %a + %b.val = load <2 x half>, ptr addrspace(1) %b %r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> ) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -349,14 +349,14 @@ define amdgpu_kernel void @fma_v2f16_imm_c( ; GCN: s_endpgm define amdgpu_kernel void @fma_v4f16( - <4 x half> addrspace(1)* %r, - <4 x half> addrspace(1)* %a, - <4 x half> addrspace(1)* %b, - <4 x half> addrspace(1)* %c) { - %a.val = load <4 x half>, <4 x half> addrspace(1)* %a - %b.val = load <4 x half>, <4 x half> addrspace(1)* %b - %c.val = load <4 x half>, <4 x half> addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load <4 x half>, ptr addrspace(1) %a + %b.val = load <4 x half>, ptr addrspace(1) %b + %c.val = load <4 x half>, ptr addrspace(1) %c %r.val = call <4 x half> @llvm.fma.v4f16(<4 x half> %a.val, <4 x half> %b.val, <4 x half> %c.val) - store <4 x half> %r.val, <4 x half> addrspace(1)* %r + store <4 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll index 183b26b..017bc0c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -36,15 +36,15 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> ; GCN: s_endpgm define amdgpu_kernel void @fmuladd_f16( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b, - half addrspace(1)* %c) { - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load half, ptr addrspace(1) %a + %b.val = load half, ptr addrspace(1) %b + %c.val = load half, ptr addrspace(1) %c %r.val = call half @llvm.fmuladd.f16(half %a.val, half %b.val, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -73,13 +73,13 @@ define amdgpu_kernel void @fmuladd_f16( ; GCN: s_endpgm define amdgpu_kernel void @fmuladd_f16_imm_a( - half addrspace(1)* %r, - half addrspace(1)* %b, - half addrspace(1)* %c) { - %b.val = load volatile half, half addrspace(1)* %b - %c.val = load volatile half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %b.val = load volatile half, ptr addrspace(1) %b + %c.val = load volatile half, ptr addrspace(1) %c %r.val = call half @llvm.fmuladd.f16(half 3.0, half %b.val, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -108,13 +108,13 @@ define amdgpu_kernel void @fmuladd_f16_imm_a( ; GCN: s_endpgm define amdgpu_kernel void @fmuladd_f16_imm_b( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %c) { - %a.val = load volatile half, half addrspace(1)* %a - %c.val = load volatile half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %c) { + %a.val = load volatile half, ptr addrspace(1) %a + %c.val = load volatile half, ptr addrspace(1) %c %r.val = call half @llvm.fmuladd.f16(half %a.val, half 3.0, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -176,14 +176,14 @@ define amdgpu_kernel void @fmuladd_f16_imm_b( ; GCN: buffer_store_{{dword|b32}} v[[R_V2_F16]] define amdgpu_kernel void @fmuladd_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b, - <2 x half> addrspace(1)* %c) { - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b - %c.val = load <2 x half>, <2 x half> addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load <2 x half>, ptr addrspace(1) %a + %b.val = load <2 x half>, ptr addrspace(1) %b + %c.val = load <2 x half>, ptr addrspace(1) %c %r.val = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> %c.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll index b4787f3..6fdfc99 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll @@ -3,7 +3,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s -define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) { ; CHECK-LABEL: test_fptrunc_round_upward: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 @@ -11,11 +11,11 @@ define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> ; CHECK-NEXT: global_store_short v[6:7], v0, off ; CHECK-NEXT: s_endpgm %res = call half @llvm.fptrunc.round(float %a, metadata !"round.upward") - store half %res, half addrspace(1)* %out, align 4 + store half %res, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) { ; CHECK-LABEL: test_fptrunc_round_downward: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 @@ -23,11 +23,11 @@ define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i3 ; CHECK-NEXT: global_store_short v[6:7], v0, off ; CHECK-NEXT: s_endpgm %res = call half @llvm.fptrunc.round(float %a, metadata !"round.downward") - store half %res, half addrspace(1)* %out, align 4 + store half %res, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) { ; CHECK-LABEL: test_fptrunc_round_upward_multiple_calls: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 @@ -45,7 +45,7 @@ define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %res3 = call half @llvm.fptrunc.round(float %b, metadata !"round.downward") %res4 = fadd half %res1, %res2 %res5 = fadd half %res3, %res4 - store half %res5, half addrspace(1)* %out, align 4 + store half %res5, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll index 4a14e55..0a8faad 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll @@ -19,12 +19,12 @@ declare <2 x half> @llvm.log.v2f16(<2 x half> %a) ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]] ; GFX9: global_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]] define void @log_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.log.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -58,11 +58,11 @@ entry: ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]] ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]] define void @log_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.log.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll index 7c6ef1c..0867b7e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -11,10 +11,10 @@ ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} ; GCN: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}} -define void @test(float addrspace(1)* %out, float %in) { +define void @test(ptr addrspace(1) %out, float %in) { entry: %res = call float @llvm.log.f32(float %in) - store float %res, float addrspace(1)* %out + store float %res, ptr addrspace(1) %out ret void } @@ -35,10 +35,10 @@ entry: ; GCN-DAG: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}} -define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { +define void @testv2(ptr addrspace(1) %out, <2 x float> %in) { entry: %res = call <2 x float> @llvm.log.v2f32(<2 x float> %in) - store <2 x float> %res, <2 x float> addrspace(1)* %out + store <2 x float> %res, ptr addrspace(1) %out ret void } @@ -73,10 +73,10 @@ entry: ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}} -define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { +define void @testv4(ptr addrspace(1) %out, <4 x float> %in) { entry: %res = call <4 x float> @llvm.log.v4f32(<4 x float> %in) - store <4 x float> %res, <4 x float> addrspace(1)* %out + store <4 x float> %res, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll index c50552a..a4083d2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll @@ -19,12 +19,12 @@ declare <2 x half> @llvm.log10.v2f16(<2 x half> %a) ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]] ; GFX9: global_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]] define void @log10_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.log10.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -59,11 +59,11 @@ entry: ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]] ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]] define void @log10_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.log10.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll index e2e55a4..d67a71b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -11,10 +11,10 @@ ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} ; GCN: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}} -define void @test(float addrspace(1)* %out, float %in) { +define void @test(ptr addrspace(1) %out, float %in) { entry: %res = call float @llvm.log10.f32(float %in) - store float %res, float addrspace(1)* %out + store float %res, ptr addrspace(1) %out ret void } @@ -35,10 +35,10 @@ entry: ; GCN-DAG: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}} -define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { +define void @testv2(ptr addrspace(1) %out, <2 x float> %in) { entry: %res = call <2 x float> @llvm.log10.v2f32(<2 x float> %in) - store <2 x float> %res, <2 x float> addrspace(1)* %out + store <2 x float> %res, ptr addrspace(1) %out ret void } @@ -73,10 +73,10 @@ entry: ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}} -define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { +define void @testv4(ptr addrspace(1) %out, <4 x float> %in) { entry: %res = call <4 x float> @llvm.log10.v4f32(<4 x float> %in) - store <4 x float> %res, <4 x float> addrspace(1)* %out + store <4 x float> %res, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll index 7228c40..85b5e3c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll @@ -13,12 +13,12 @@ declare <2 x half> @llvm.log2.v2f16(<2 x half> %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @log2_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.log2.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -43,11 +43,11 @@ entry: ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @log2_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.log2.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll index bb97a2b..8702299 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -11,10 +11,10 @@ ;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} ;SI: v_log_f32 -define amdgpu_kernel void @test(float addrspace(1)* %out, float %in) { +define amdgpu_kernel void @test(ptr addrspace(1) %out, float %in) { entry: %0 = call float @llvm.log2.f32(float %in) - store float %0, float addrspace(1)* %out + store float %0, ptr addrspace(1) %out ret void } @@ -34,10 +34,10 @@ entry: ;SI: v_log_f32 ;SI: v_log_f32 -define amdgpu_kernel void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { +define amdgpu_kernel void @testv2(ptr addrspace(1) %out, <2 x float> %in) { entry: %0 = call <2 x float> @llvm.log2.v2f32(<2 x float> %in) - store <2 x float> %0, <2 x float> addrspace(1)* %out + store <2 x float> %0, ptr addrspace(1) %out ret void } @@ -68,10 +68,10 @@ entry: ;SI: v_log_f32 ;SI: v_log_f32 ;SI: v_log_f32 -define amdgpu_kernel void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { +define amdgpu_kernel void @testv4(ptr addrspace(1) %out, <4 x float> %in) { entry: %0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in) - store <4 x float> %0, <4 x float> addrspace(1)* %out + store <4 x float> %0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll index 0271fa4..1699147 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -142,14 +142,14 @@ define amdgpu_kernel void @maxnum_f16( ; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) #0 { entry: - %a.val = load volatile half, half addrspace(1)* %a - %b.val = load volatile half, half addrspace(1)* %b + %a.val = load volatile half, ptr addrspace(1) %a + %b.val = load volatile half, ptr addrspace(1) %b %r.val = call half @llvm.maxnum.f16(half %a.val, half %b.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -252,12 +252,12 @@ define amdgpu_kernel void @maxnum_f16_imm_a( ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - half addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %b) #0 { entry: - %b.val = load half, half addrspace(1)* %b + %b.val = load half, ptr addrspace(1) %b %r.val = call half @llvm.maxnum.f16(half 3.0, half %b.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -360,12 +360,12 @@ define amdgpu_kernel void @maxnum_f16_imm_b( ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - half addrspace(1)* %a) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a) #0 { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.maxnum.f16(half %a.val, half 4.0) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -476,14 +476,14 @@ define amdgpu_kernel void @maxnum_v2f16( ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) #0 { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + %a.val = load <2 x half>, ptr addrspace(1) %a + %b.val = load <2 x half>, ptr addrspace(1) %b %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> %b.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -569,12 +569,12 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a( ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %b) #0 { entry: - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + %b.val = load <2 x half>, ptr addrspace(1) %b %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> , <2 x half> %b.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -660,12 +660,12 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b( ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a) #0 { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> ) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -801,14 +801,14 @@ define amdgpu_kernel void @maxnum_v3f16( ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <3 x half> addrspace(1)* %r, - <3 x half> addrspace(1)* %a, - <3 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) #0 { entry: - %a.val = load <3 x half>, <3 x half> addrspace(1)* %a - %b.val = load <3 x half>, <3 x half> addrspace(1)* %b + %a.val = load <3 x half>, ptr addrspace(1) %a + %b.val = load <3 x half>, ptr addrspace(1) %b %r.val = call <3 x half> @llvm.maxnum.v3f16(<3 x half> %a.val, <3 x half> %b.val) - store <3 x half> %r.val, <3 x half> addrspace(1)* %r + store <3 x half> %r.val, ptr addrspace(1) %r ret void } @@ -955,14 +955,14 @@ define amdgpu_kernel void @maxnum_v4f16( ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <4 x half> addrspace(1)* %r, - <4 x half> addrspace(1)* %a, - <4 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) #0 { entry: - %a.val = load <4 x half>, <4 x half> addrspace(1)* %a - %b.val = load <4 x half>, <4 x half> addrspace(1)* %b + %a.val = load <4 x half>, ptr addrspace(1) %a + %b.val = load <4 x half>, ptr addrspace(1) %b %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a.val, <4 x half> %b.val) - store <4 x half> %r.val, <4 x half> addrspace(1)* %r + store <4 x half> %r.val, ptr addrspace(1) %r ret void } @@ -1077,12 +1077,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a( ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <4 x half> addrspace(1)* %r, - <4 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %b) #0 { entry: - %b.val = load <4 x half>, <4 x half> addrspace(1)* %b + %b.val = load <4 x half>, ptr addrspace(1) %b %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> , <4 x half> %b.val) - store <4 x half> %r.val, <4 x half> addrspace(1)* %r + store <4 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll index f8b331c..25194c7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -1,9 +1,9 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind -declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind -declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(4)* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture, i64, i1) nounwind +declare void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i64, i1) nounwind ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1: @@ -80,10 +80,8 @@ declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; SI-DAG: ds_write_b8 ; SI: s_endpgm -define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { - %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* - %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i1 false) nounwind +define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind { + call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) %out, ptr addrspace(3) %in, i32 32, i1 false) nounwind ret void } @@ -125,10 +123,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace ; SI-DAG: ds_write_b16 ; SI: s_endpgm -define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { - %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* - %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %bcout, i8 addrspace(3)* align 2 %bcin, i32 32, i1 false) nounwind +define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind { + call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 2 %out, ptr addrspace(3) align 2 %in, i32 32, i1 false) nounwind ret void } @@ -144,10 +140,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace ; SI: ds_write2_b32 ; SI: s_endpgm -define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { - %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* - %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %bcout, i8 addrspace(3)* align 4 %bcin, i32 32, i1 false) nounwind +define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind { + call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 4 %out, ptr addrspace(3) align 4 %in, i32 32, i1 false) nounwind ret void } @@ -161,10 +155,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace ; SI: ds_write2_b64 ; SI-DAG: s_endpgm -define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { - %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* - %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 8 %bcout, i8 addrspace(3)* align 8 %bcin, i32 32, i1 false) nounwind +define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind { + call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 8 %out, ptr addrspace(3) align 8 %in, i32 32, i1 false) nounwind ret void } @@ -238,10 +230,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace ; SI-DAG: buffer_store_byte ; SI: s_endpgm -define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { - %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* - %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i1 false) nounwind +define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { + call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 32, i1 false) nounwind ret void } @@ -281,10 +271,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 add ; SI-DAG: buffer_store_short ; SI: s_endpgm -define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { - %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* - %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %bcout, i8 addrspace(1)* align 2 %bcin, i64 32, i1 false) nounwind +define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { + call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 2 %out, ptr addrspace(1) align 2 %in, i64 32, i1 false) nounwind ret void } @@ -294,10 +282,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 add ; SI: buffer_store_dwordx4 ; SI: buffer_store_dwordx4 ; SI: s_endpgm -define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { - %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* - %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %bcout, i8 addrspace(1)* align 4 %bcin, i64 32, i1 false) nounwind +define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { + call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %out, ptr addrspace(1) align 4 %in, i64 32, i1 false) nounwind ret void } @@ -307,10 +293,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 add ; SI: buffer_store_dwordx4 ; SI: buffer_store_dwordx4 ; SI: s_endpgm -define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { - %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* - %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %bcout, i8 addrspace(1)* align 8 %bcin, i64 32, i1 false) nounwind +define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { + call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 8 %out, ptr addrspace(1) align 8 %in, i64 32, i1 false) nounwind ret void } @@ -320,10 +304,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 add ; SI: buffer_store_dwordx4 ; SI: buffer_store_dwordx4 ; SI: s_endpgm -define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { - %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* - %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 16 %bcout, i8 addrspace(1)* align 16 %bcin, i64 32, i1 false) nounwind +define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { + call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 16 %out, ptr addrspace(1) align 16 %in, i64 32, i1 false) nounwind ret void } @@ -340,9 +322,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad ; SI-DAG: s_load_dwordx2 ; SI-DAG: buffer_store_dwordx4 ; SI-DAG: buffer_store_dwordx4 -define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind { - %str = bitcast [16 x i8] addrspace(4)* @hello.align4 to i8 addrspace(4)* - call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(4)* align 4 %str, i64 32, i1 false) +define amdgpu_kernel void @test_memcpy_const_string_align4(ptr addrspace(1) noalias %out) nounwind { + call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) align 4 %out, ptr addrspace(4) align 4 @hello.align4, i64 32, i1 false) ret void } @@ -365,8 +346,7 @@ define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noal ; SI: buffer_store_byte ; SI: buffer_store_byte ; SI: buffer_store_byte -define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind { - %str = bitcast [16 x i8] addrspace(4)* @hello.align1 to i8 addrspace(4)* - call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(4)* %str, i64 32, i1 false) +define amdgpu_kernel void @test_memcpy_const_string_align1(ptr addrspace(1) noalias %out) nounwind { + call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) %out, ptr addrspace(4) @hello.align1, i64 32, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll index 4de4268..699ac77 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -141,14 +141,14 @@ define amdgpu_kernel void @minnum_f16_ieee( ; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) #0 { entry: - %a.val = load volatile half, half addrspace(1)* %a - %b.val = load volatile half, half addrspace(1)* %b + %a.val = load volatile half, ptr addrspace(1) %a + %b.val = load volatile half, ptr addrspace(1) %b %r.val = call half @llvm.minnum.f16(half %a.val, half %b.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -278,12 +278,12 @@ define amdgpu_kernel void @minnum_f16_imm_a( ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - half addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %b) #0 { entry: - %b.val = load half, half addrspace(1)* %b + %b.val = load half, ptr addrspace(1) %b %r.val = call half @llvm.minnum.f16(half 3.0, half %b.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -385,12 +385,12 @@ define amdgpu_kernel void @minnum_f16_imm_b( ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - half addrspace(1)* %a) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a) #0 { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.minnum.f16(half %a.val, half 4.0) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -500,14 +500,14 @@ define amdgpu_kernel void @minnum_v2f16_ieee( ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) #0 { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + %a.val = load <2 x half>, ptr addrspace(1) %a + %b.val = load <2 x half>, ptr addrspace(1) %b %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> %b.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -627,12 +627,12 @@ define amdgpu_kernel void @minnum_v2f16_imm_a( ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %b) #0 { entry: - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + %b.val = load <2 x half>, ptr addrspace(1) %b %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> , <2 x half> %b.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -717,12 +717,12 @@ define amdgpu_kernel void @minnum_v2f16_imm_b( ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a) #0 { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> ) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -857,14 +857,14 @@ define amdgpu_kernel void @minnum_v3f16( ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <3 x half> addrspace(1)* %r, - <3 x half> addrspace(1)* %a, - <3 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) #0 { entry: - %a.val = load <3 x half>, <3 x half> addrspace(1)* %a - %b.val = load <3 x half>, <3 x half> addrspace(1)* %b + %a.val = load <3 x half>, ptr addrspace(1) %a + %b.val = load <3 x half>, ptr addrspace(1) %b %r.val = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a.val, <3 x half> %b.val) - store <3 x half> %r.val, <3 x half> addrspace(1)* %r + store <3 x half> %r.val, ptr addrspace(1) %r ret void } @@ -1010,14 +1010,14 @@ define amdgpu_kernel void @minnum_v4f16( ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <4 x half> addrspace(1)* %r, - <4 x half> addrspace(1)* %a, - <4 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) #0 { entry: - %a.val = load <4 x half>, <4 x half> addrspace(1)* %a - %b.val = load <4 x half>, <4 x half> addrspace(1)* %b + %a.val = load <4 x half>, ptr addrspace(1) %a + %b.val = load <4 x half>, ptr addrspace(1) %b %r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a.val, <4 x half> %b.val) - store <4 x half> %r.val, <4 x half> addrspace(1)* %r + store <4 x half> %r.val, ptr addrspace(1) %r ret void } @@ -1131,12 +1131,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a( ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - <4 x half> addrspace(1)* %r, - <4 x half> addrspace(1)* %b) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %b) #0 { entry: - %b.val = load <4 x half>, <4 x half> addrspace(1)* %b + %b.val = load <4 x half>, ptr addrspace(1) %b %r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> , <4 x half> %b.val) - store <4 x half> %r.val, <4 x half> addrspace(1)* %r + store <4 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll index fdcceea..8d8a525 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -383,7 +383,7 @@ bb: %mul = extractvalue { i64, i1 } %umulo, 0 %overflow = extractvalue { i64, i1 } %umulo, 1 %res = select i1 %overflow, i64 0, i64 %mul - store i64 %res, i64 addrspace(1)* undef + store i64 %res, ptr addrspace(1) undef ret void } @@ -567,7 +567,7 @@ bb: %mul = extractvalue { i64, i1 } %umulo, 0 %overflow = extractvalue { i64, i1 } %umulo, 1 %res = select i1 %overflow, i64 0, i64 %mul - store i64 %res, i64 addrspace(1)* undef + store i64 %res, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll index 657b14d..9f2c193 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll @@ -5,14 +5,14 @@ ; GFX908: v_mul_legacy_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX90A: v_mul_legacy_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @mul_legacy( - float addrspace(1)* %r, - float addrspace(1)* %a, - float addrspace(1)* %b) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) { entry: - %a.val = load volatile float, float addrspace(1)* %a - %b.val = load volatile float, float addrspace(1)* %b + %a.val = load volatile float, ptr addrspace(1) %a + %b.val = load volatile float, ptr addrspace(1) %b %r.val = call float @llvm.pow.f32(float %a.val, float %b.val) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll index 5b3d513..8c82f53 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll @@ -14,12 +14,12 @@ declare <2 x half> @llvm.rint.v2f16(<2 x half> %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @rint_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.rint.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -49,11 +49,11 @@ entry: ; GCN: s_endpgm define amdgpu_kernel void @rint_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.rint.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll index cfffaa0..87ed956 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll @@ -11,20 +11,20 @@ ; SI: v_cndmask_b32 ; SI: v_cndmask_b32 ; SI: s_endpgm -define amdgpu_kernel void @rint_f64(double addrspace(1)* %out, double %in) { +define amdgpu_kernel void @rint_f64(ptr addrspace(1) %out, double %in) { entry: %0 = call double @llvm.rint.f64(double %in) - store double %0, double addrspace(1)* %out + store double %0, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}rint_v2f64: ; CI: v_rndne_f64_e32 ; CI: v_rndne_f64_e32 -define amdgpu_kernel void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { +define amdgpu_kernel void @rint_v2f64(ptr addrspace(1) %out, <2 x double> %in) { entry: %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in) - store <2 x double> %0, <2 x double> addrspace(1)* %out + store <2 x double> %0, ptr addrspace(1) %out ret void } @@ -33,10 +33,10 @@ entry: ; CI: v_rndne_f64_e32 ; CI: v_rndne_f64_e32 ; CI: v_rndne_f64_e32 -define amdgpu_kernel void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { +define amdgpu_kernel void @rint_v4f64(ptr addrspace(1) %out, <4 x double> %in) { entry: %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in) - store <4 x double> %0, <4 x double> addrspace(1)* %out + store <4 x double> %0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll index 4056bc3..cabb9dc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll @@ -6,10 +6,10 @@ ; R600: RNDNE ; SI: v_rndne_f32_e32 -define amdgpu_kernel void @rint_f32(float addrspace(1)* %out, float %in) { +define amdgpu_kernel void @rint_f32(ptr addrspace(1) %out, float %in) { entry: %0 = call float @llvm.rint.f32(float %in) #0 - store float %0, float addrspace(1)* %out + store float %0, ptr addrspace(1) %out ret void } @@ -19,10 +19,10 @@ entry: ; SI: v_rndne_f32_e32 ; SI: v_rndne_f32_e32 -define amdgpu_kernel void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { +define amdgpu_kernel void @rint_v2f32(ptr addrspace(1) %out, <2 x float> %in) { entry: %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0 - store <2 x float> %0, <2 x float> addrspace(1)* %out + store <2 x float> %0, ptr addrspace(1) %out ret void } @@ -36,10 +36,10 @@ entry: ; SI: v_rndne_f32_e32 ; SI: v_rndne_f32_e32 ; SI: v_rndne_f32_e32 -define amdgpu_kernel void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { +define amdgpu_kernel void @rint_v4f32(ptr addrspace(1) %out, <4 x float> %in) { entry: %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0 - store <4 x float> %0, <4 x float> addrspace(1)* %out + store <4 x float> %0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll index 8b21896..4ed575d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=CI %s -define amdgpu_kernel void @round_f64(double addrspace(1)* %out, double %x) #0 { +define amdgpu_kernel void @round_f64(ptr addrspace(1) %out, double %x) #0 { ; SI-LABEL: round_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -59,11 +59,11 @@ define amdgpu_kernel void @round_f64(double addrspace(1)* %out, double %x) #0 { ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_endpgm %result = call double @llvm.round.f64(double %x) #1 - store double %result, double addrspace(1)* %out + store double %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_round_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -128,15 +128,15 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa ; CI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid - %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid - %x = load double, double addrspace(1)* %gep + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid + %x = load double, ptr addrspace(1) %gep %result = call double @llvm.round.f64(double %x) #1 - store double %result, double addrspace(1)* %out.gep + store double %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #0 { +define amdgpu_kernel void @round_v2f64(ptr addrspace(1) %out, <2 x double> %in) #0 { ; SI-LABEL: round_v2f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -219,11 +219,11 @@ define amdgpu_kernel void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x dou ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_endpgm %result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) #1 - store <2 x double> %result, <2 x double> addrspace(1)* %out + store <2 x double> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #0 { +define amdgpu_kernel void @round_v4f64(ptr addrspace(1) %out, <4 x double> %in) #0 { ; SI-LABEL: round_v4f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x11 @@ -364,11 +364,11 @@ define amdgpu_kernel void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x dou ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_endpgm %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1 - store <4 x double> %result, <4 x double> addrspace(1)* %out + store <4 x double> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %in) #0 { +define amdgpu_kernel void @round_v8f64(ptr addrspace(1) %out, <8 x double> %in) #0 { ; SI-LABEL: round_v8f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x19 @@ -625,7 +625,7 @@ define amdgpu_kernel void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x dou ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[20:23], 0 ; CI-NEXT: s_endpgm %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1 - store <8 x double> %result, <8 x double> addrspace(1)* %out + store <8 x double> %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.ll index ae029f5..6525c02 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.round.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.round.ll @@ -21,9 +21,9 @@ ; R600-DAG: SETGE ; R600-DAG: CNDE ; R600-DAG: ADD -define amdgpu_kernel void @round_f32(float addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @round_f32(ptr addrspace(1) %out, float %x) #0 { %result = call float @llvm.round.f32(float %x) #1 - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } @@ -35,27 +35,27 @@ define amdgpu_kernel void @round_f32(float addrspace(1)* %out, float %x) #0 { ; FUNC-LABEL: {{^}}round_v2f32: ; GCN: s_endpgm ; R600: CF_END -define amdgpu_kernel void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 { +define amdgpu_kernel void @round_v2f32(ptr addrspace(1) %out, <2 x float> %in) #0 { %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1 - store <2 x float> %result, <2 x float> addrspace(1)* %out + store <2 x float> %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}round_v4f32: ; GCN: s_endpgm ; R600: CF_END -define amdgpu_kernel void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 { +define amdgpu_kernel void @round_v4f32(ptr addrspace(1) %out, <4 x float> %in) #0 { %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1 - store <4 x float> %result, <4 x float> addrspace(1)* %out + store <4 x float> %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}round_v8f32: ; GCN: s_endpgm ; R600: CF_END -define amdgpu_kernel void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 { +define amdgpu_kernel void @round_v8f32(ptr addrspace(1) %out, <8 x float> %in) #0 { %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1 - store <8 x float> %result, <8 x float> addrspace(1)* %out + store <8 x float> %result, ptr addrspace(1) %out ret void } @@ -72,11 +72,11 @@ define amdgpu_kernel void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x floa ; GFX89: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[COPYSIGN]] ; GFX89: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TRUNC]], [[SEL]] ; GFX89: buffer_store_short [[RESULT]] -define amdgpu_kernel void @round_f16(half addrspace(1)* %out, i32 %x.arg) #0 { +define amdgpu_kernel void @round_f16(ptr addrspace(1) %out, i32 %x.arg) #0 { %x.arg.trunc = trunc i32 %x.arg to i16 %x = bitcast i16 %x.arg.trunc to half %result = call half @llvm.round.f16(half %x) #1 - store half %result, half addrspace(1)* %out + store half %result, ptr addrspace(1) %out ret void } @@ -88,10 +88,10 @@ define amdgpu_kernel void @round_f16(half addrspace(1)* %out, i32 %x.arg) #0 { ; GFX89: v_bfi_b32 [[COPYSIGN1:v[0-9]+]], [[K]], [[BFI_K]], ; GFX9: v_pack_b32_f16 -define amdgpu_kernel void @round_v2f16(<2 x half> addrspace(1)* %out, i32 %in.arg) #0 { +define amdgpu_kernel void @round_v2f16(ptr addrspace(1) %out, i32 %in.arg) #0 { %in = bitcast i32 %in.arg to <2 x half> %result = call <2 x half> @llvm.round.v2f16(<2 x half> %in) - store <2 x half> %result, <2 x half> addrspace(1)* %out + store <2 x half> %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll index 983fcca..2ad122c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll @@ -5,7 +5,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s -define amdgpu_kernel void @sin_f16(half addrspace(1)* %r, half addrspace(1)* %a) { +define amdgpu_kernel void @sin_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) { ; GFX6-LABEL: sin_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -81,13 +81,13 @@ define amdgpu_kernel void @sin_f16(half addrspace(1)* %r, half addrspace(1)* %a) ; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.sin.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } -define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) { +define amdgpu_kernel void @sin_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) { ; GFX6-LABEL: sin_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -190,9 +190,9 @@ define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.sin.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll index 7f033a6..6431c39 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll @@ -16,9 +16,9 @@ ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @sin_f32(float addrspace(1)* %out, float %x) #1 { +define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 { %sin = call float @llvm.sin.f32(float %x) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -29,10 +29,10 @@ define amdgpu_kernel void @sin_f32(float addrspace(1)* %out, float %x) #1 { ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @safe_sin_3x_f32(float addrspace(1)* %out, float %x) #1 { +define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 { %y = fmul float 3.0, %x %sin = call float @llvm.sin.f32(float %y) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -44,10 +44,10 @@ define amdgpu_kernel void @safe_sin_3x_f32(float addrspace(1)* %out, float %x) # ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x) #2 { +define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 { %y = fmul float 3.0, %x %sin = call float @llvm.sin.f32(float %y) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -59,10 +59,10 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x) ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @fmf_sin_3x_f32(float addrspace(1)* %out, float %x) #1 { +define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 { %y = fmul reassoc float 3.0, %x %sin = call reassoc float @llvm.sin.f32(float %y) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -73,10 +73,10 @@ define amdgpu_kernel void @fmf_sin_3x_f32(float addrspace(1)* %out, float %x) #1 ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @safe_sin_2x_f32(float addrspace(1)* %out, float %x) #1 { +define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 { %y = fmul float 2.0, %x %sin = call float @llvm.sin.f32(float %y) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -88,10 +88,10 @@ define amdgpu_kernel void @safe_sin_2x_f32(float addrspace(1)* %out, float %x) # ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x) #2 { +define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 { %y = fmul float 2.0, %x %sin = call float @llvm.sin.f32(float %y) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -103,10 +103,10 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x) ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @fmf_sin_2x_f32(float addrspace(1)* %out, float %x) #1 { +define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 { %y = fmul reassoc float 2.0, %x %sin = call reassoc float @llvm.sin.f32(float %y) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -117,10 +117,10 @@ define amdgpu_kernel void @fmf_sin_2x_f32(float addrspace(1)* %out, float %x) #1 ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @safe_sin_cancel_f32(float addrspace(1)* %out, float %x) #1 { +define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 { %y = fmul float 0x401921FB60000000, %x %sin = call float @llvm.sin.f32(float %y) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -131,10 +131,10 @@ define amdgpu_kernel void @safe_sin_cancel_f32(float addrspace(1)* %out, float % ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @unsafe_sin_cancel_f32(float addrspace(1)* %out, float %x) #2 { +define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #2 { %y = fmul float 0x401921FB60000000, %x %sin = call float @llvm.sin.f32(float %y) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -145,10 +145,10 @@ define amdgpu_kernel void @unsafe_sin_cancel_f32(float addrspace(1)* %out, float ; GFX9-NOT: v_fract_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @fmf_sin_cancel_f32(float addrspace(1)* %out, float %x) #1 { +define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 { %y = fmul reassoc float 0x401921FB60000000, %x %sin = call reassoc float @llvm.sin.f32(float %y) - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } @@ -164,9 +164,9 @@ define amdgpu_kernel void @fmf_sin_cancel_f32(float addrspace(1)* %out, float %x ; GCN: v_sin_f32 ; GCN: v_sin_f32 ; GCN-NOT: v_sin_f32 -define amdgpu_kernel void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 { +define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) #1 { %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx) - store <4 x float> %sin, <4 x float> addrspace(1)* %out + store <4 x float> %sin, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll index 1e00e09..f90176b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll @@ -13,12 +13,12 @@ declare <2 x half> @llvm.sqrt.v2f16(<2 x half> %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @sqrt_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.sqrt.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -43,11 +43,11 @@ entry: ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @sqrt_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll index 1e9212e..c36d337 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll @@ -13,12 +13,12 @@ declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @trunc_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.trunc.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -43,11 +43,11 @@ entry: ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @trunc_v2f16( - <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = call <2 x half> @llvm.trunc.v2f16(<2 x half> %a.val) - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void }