From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 25 Nov 2022 03:53:12 +0000 (-0500)
Subject: AMDGPU: Bulk update some generic intrinsic tests to opaque pointers
X-Git-Tag: upstream/17.0.6~25944
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fb1d166e1d05272c569ed0502befd929fb78dffc;p=platform%2Fupstream%2Fllvm.git

AMDGPU: Bulk update some generic intrinsic tests to opaque pointers

Done purely with the script.
---

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
index 49f100f..ac28735 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.ceil.v2f16(<2 x half> %a)
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @ceil_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.ceil.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -43,11 +43,11 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @ceil_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.ceil.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
index eaf632d..23e11c5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-define amdgpu_kernel void @cos_f16(half addrspace(1)* %r, half addrspace(1)* %a) {
+define amdgpu_kernel void @cos_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX6-LABEL: cos_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -81,13 +81,13 @@ define amdgpu_kernel void @cos_f16(half addrspace(1)* %r, half addrspace(1)* %a)
 ; GFX11-NEXT:    global_store_b16 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.cos.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
-define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) {
+define amdgpu_kernel void @cos_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX6-LABEL: cos_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -190,9 +190,9 @@ define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.cos.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.ll
index bd89502..c5dc863 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.cos.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.ll
@@ -11,9 +11,9 @@
 ;SI: v_cos_f32
 ;SI-NOT: v_cos_f32
 
-define amdgpu_kernel void @test(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, float %x) #1 {
    %cos = call float @llvm.cos.f32(float %x)
-   store float %cos, float addrspace(1)* %out
+   store float %cos, ptr addrspace(1) %out
    ret void
 }
 
@@ -29,9 +29,9 @@ define amdgpu_kernel void @test(float addrspace(1)* %out, float %x) #1 {
 ;SI: v_cos_f32
 ;SI-NOT: v_cos_f32
 
-define amdgpu_kernel void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 {
+define amdgpu_kernel void @testv(ptr addrspace(1) %out, <4 x float> inreg %vx) #1 {
    %cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx)
-   store <4 x float> %cos, <4 x float> addrspace(1)* %out
+   store <4 x float> %cos, ptr addrspace(1) %out
    ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
index 697fdfb..d8358db 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@@ -9,10 +9,10 @@
 
 ; GCN: flat_store_dword
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 {
+define amdgpu_kernel void @test_debug_value(ptr addrspace(1) nocapture %globalptr_arg) #0 !dbg !4 {
 entry:
-  tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, metadata !10, metadata !13), !dbg !14
-  store i32 123, i32 addrspace(1)* %globalptr_arg, align 4
+  tail call void @llvm.dbg.value(metadata ptr addrspace(1) %globalptr_arg, metadata !10, metadata !13), !dbg !14
+  store i32 123, ptr addrspace(1) %globalptr_arg, align 4
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
index 8691d21..01e58ec 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.exp2.v2f16(<2 x half> %a)
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @exp2_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.exp2.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -43,11 +43,11 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @exp2_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.exp2.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
index aa0e1e3..6a4fefb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
@@ -11,10 +11,10 @@
 ;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
 ;SI: v_exp_f32
 
-define amdgpu_kernel void @test(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, float %in) {
 entry:
    %0 = call float @llvm.exp2.f32(float %in)
-   store float %0, float addrspace(1)* %out
+   store float %0, ptr addrspace(1) %out
    ret void
 }
 
@@ -34,10 +34,10 @@ entry:
 ;SI: v_exp_f32
 ;SI: v_exp_f32
 
-define amdgpu_kernel void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @testv2(ptr addrspace(1) %out, <2 x float> %in) {
 entry:
   %0 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in)
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  store <2 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -68,10 +68,10 @@ entry:
 ;SI: v_exp_f32
 ;SI: v_exp_f32
 ;SI: v_exp_f32
-define amdgpu_kernel void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @testv4(ptr addrspace(1) %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
-  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  store <4 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
index 081dc61..e5c927b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.floor.v2f16(<2 x half> %a)
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @floor_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.floor.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -43,11 +43,11 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @floor_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.floor.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
index d4b2d11..d51b4ff 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
@@ -19,15 +19,15 @@ declare <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fma_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) {
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) {
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
   %r.val = call half @llvm.fma.f16(half %a.val, half %b.val, half %c.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -45,13 +45,13 @@ define amdgpu_kernel void @fma_f16(
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fma_f16_imm_a(
-    half addrspace(1)* %r,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) {
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) {
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
   %r.val = call half @llvm.fma.f16(half 3.0, half %b.val, half %c.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -68,13 +68,13 @@ define amdgpu_kernel void @fma_f16_imm_a(
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fma_f16_imm_b(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %c) {
-  %a.val = load half, half addrspace(1)* %a
-  %c.val = load half, half addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %c) {
+  %a.val = load half, ptr addrspace(1) %a
+  %c.val = load half, ptr addrspace(1) %c
   %r.val = call half @llvm.fma.f16(half %a.val, half 3.0, half %c.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -91,13 +91,13 @@ define amdgpu_kernel void @fma_f16_imm_b(
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fma_f16_imm_c(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b) {
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
   %r.val = call half @llvm.fma.f16(half %a.val, half %b.val, half 3.0)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -137,15 +137,15 @@ define amdgpu_kernel void @fma_f16_imm_c(
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fma_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) {
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) {
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
   %r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> %c.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -184,13 +184,13 @@ define amdgpu_kernel void @fma_v2f16(
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fma_v2f16_imm_a(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) {
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) {
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
   %r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> <half 3.0, half 3.0>, <2 x half> %b.val, <2 x half> %c.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -230,13 +230,13 @@ define amdgpu_kernel void @fma_v2f16_imm_a(
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fma_v2f16_imm_b(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %c) {
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %c) {
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %c.val = load <2 x half>, ptr addrspace(1) %c
   %r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> %a.val, <2 x half> <half 3.0, half 3.0>, <2 x half> %c.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -280,13 +280,13 @@ define amdgpu_kernel void @fma_v2f16_imm_b(
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fma_v2f16_imm_c(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) {
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> <half 3.0, half 3.0>)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -349,14 +349,14 @@ define amdgpu_kernel void @fma_v2f16_imm_c(
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @fma_v4f16(
-    <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %a,
-    <4 x half> addrspace(1)* %b,
-    <4 x half> addrspace(1)* %c) {
-  %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
-  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
-  %c.val = load <4 x half>, <4 x half> addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) {
+  %a.val = load <4 x half>, ptr addrspace(1) %a
+  %b.val = load <4 x half>, ptr addrspace(1) %b
+  %c.val = load <4 x half>, ptr addrspace(1) %c
   %r.val = call <4 x half> @llvm.fma.v4f16(<4 x half> %a.val, <4 x half> %b.val, <4 x half> %c.val)
-  store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+  store <4 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index 183b26b..017bc0c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -36,15 +36,15 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half>
 
 ; GCN: s_endpgm
 define amdgpu_kernel void @fmuladd_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) {
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) {
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
   %r.val = call half @llvm.fmuladd.f16(half %a.val, half %b.val, half %c.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -73,13 +73,13 @@ define amdgpu_kernel void @fmuladd_f16(
 
 ; GCN: s_endpgm
 define amdgpu_kernel void @fmuladd_f16_imm_a(
-    half addrspace(1)* %r,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) {
-  %b.val = load volatile half, half addrspace(1)* %b
-  %c.val = load volatile half, half addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) {
+  %b.val = load volatile half, ptr addrspace(1) %b
+  %c.val = load volatile half, ptr addrspace(1) %c
   %r.val = call half @llvm.fmuladd.f16(half 3.0, half %b.val, half %c.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -108,13 +108,13 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
 
 ; GCN: s_endpgm
 define amdgpu_kernel void @fmuladd_f16_imm_b(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %c) {
-  %a.val = load volatile half, half addrspace(1)* %a
-  %c.val = load volatile half, half addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %c) {
+  %a.val = load volatile half, ptr addrspace(1) %a
+  %c.val = load volatile half, ptr addrspace(1) %c
   %r.val = call half @llvm.fmuladd.f16(half %a.val, half 3.0, half %c.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -176,14 +176,14 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
 
 ; GCN: buffer_store_{{dword|b32}} v[[R_V2_F16]]
 define amdgpu_kernel void @fmuladd_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) {
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) {
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
   %r.val = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> %c.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index b4787f3..6fdfc99 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
 
-define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
 ; CHECK-LABEL: test_fptrunc_round_upward:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
@@ -11,11 +11,11 @@ define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32>
 ; CHECK-NEXT:    global_store_short v[6:7], v0, off
 ; CHECK-NEXT:    s_endpgm
   %res = call half @llvm.fptrunc.round(float %a, metadata !"round.upward")
-  store half %res, half addrspace(1)* %out, align 4
+  store half %res, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
 ; CHECK-LABEL: test_fptrunc_round_downward:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
@@ -23,11 +23,11 @@ define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i3
 ; CHECK-NEXT:    global_store_short v[6:7], v0, off
 ; CHECK-NEXT:    s_endpgm
   %res = call half @llvm.fptrunc.round(float %a, metadata !"round.downward")
-  store half %res, half addrspace(1)* %out, align 4
+  store half %res, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
 ; CHECK-LABEL: test_fptrunc_round_upward_multiple_calls:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
@@ -45,7 +45,7 @@ define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float
   %res3 = call half @llvm.fptrunc.round(float %b, metadata !"round.downward")
   %res4 = fadd half %res1, %res2
   %res5 = fadd half %res3, %res4
-  store half %res5, half addrspace(1)* %out, align 4
+  store half %res5, ptr addrspace(1) %out, align 4
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll
index 4a14e55..0a8faad 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll
@@ -19,12 +19,12 @@ declare <2 x half> @llvm.log.v2f16(<2 x half> %a)
 ; VI:     flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]]
 ; GFX9:   global_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]]
 define void @log_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.log.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -58,11 +58,11 @@ entry:
 ; VI:     flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]]
 ; GFX9:   global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]]
 define void @log_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.log.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index 7c6ef1c..0867b7e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -11,10 +11,10 @@
 ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
 ; GCN: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
-define void @test(float addrspace(1)* %out, float %in) {
+define void @test(ptr addrspace(1) %out, float %in) {
 entry:
    %res = call float @llvm.log.f32(float %in)
-   store float %res, float addrspace(1)* %out
+   store float %res, ptr addrspace(1) %out
    ret void
 }
 
@@ -35,10 +35,10 @@ entry:
 ; GCN-DAG: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
-define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define void @testv2(ptr addrspace(1) %out, <2 x float> %in) {
 entry:
   %res = call <2 x float> @llvm.log.v2f32(<2 x float> %in)
-  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  store <2 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -73,10 +73,10 @@ entry:
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
-define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define void @testv4(ptr addrspace(1) %out, <4 x float> %in) {
 entry:
   %res = call <4 x float> @llvm.log.v4f32(<4 x float> %in)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll
index c50552a..a4083d2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll
@@ -19,12 +19,12 @@ declare <2 x half> @llvm.log10.v2f16(<2 x half> %a)
 ; VI:     flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]]
 ; GFX9:   global_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]]
 define void @log10_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.log10.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -59,11 +59,11 @@ entry:
 ; VI:     flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]]
 ; GFX9:   global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]]
 define void @log10_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.log10.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index e2e55a4..d67a71b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -11,10 +11,10 @@
 ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
 ; GCN: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
-define void @test(float addrspace(1)* %out, float %in) {
+define void @test(ptr addrspace(1) %out, float %in) {
 entry:
    %res = call float @llvm.log10.f32(float %in)
-   store float %res, float addrspace(1)* %out
+   store float %res, ptr addrspace(1) %out
    ret void
 }
 
@@ -35,10 +35,10 @@ entry:
 ; GCN-DAG: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
-define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define void @testv2(ptr addrspace(1) %out, <2 x float> %in) {
 entry:
   %res = call <2 x float> @llvm.log10.v2f32(<2 x float> %in)
-  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  store <2 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -73,10 +73,10 @@ entry:
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
-define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define void @testv4(ptr addrspace(1) %out, <4 x float> %in) {
 entry:
   %res = call <4 x float> @llvm.log10.v4f32(<4 x float> %in)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll
index 7228c40..85b5e3c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.log2.v2f16(<2 x half> %a)
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @log2_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.log2.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -43,11 +43,11 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @log2_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.log2.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
index bb97a2b..8702299 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
@@ -11,10 +11,10 @@
 ;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
 ;SI: v_log_f32
 
-define amdgpu_kernel void @test(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, float %in) {
 entry:
    %0 = call float @llvm.log2.f32(float %in)
-   store float %0, float addrspace(1)* %out
+   store float %0, ptr addrspace(1) %out
    ret void
 }
 
@@ -34,10 +34,10 @@ entry:
 ;SI: v_log_f32
 ;SI: v_log_f32
 
-define amdgpu_kernel void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @testv2(ptr addrspace(1) %out, <2 x float> %in) {
 entry:
   %0 = call <2 x float> @llvm.log2.v2f32(<2 x float> %in)
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  store <2 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -68,10 +68,10 @@ entry:
 ;SI: v_log_f32
 ;SI: v_log_f32
 ;SI: v_log_f32
-define amdgpu_kernel void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @testv4(ptr addrspace(1) %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in)
-  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  store <4 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index 0271fa4..1699147 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -142,14 +142,14 @@ define amdgpu_kernel void @maxnum_f16(
 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %a.val = load volatile half, half addrspace(1)* %a
-  %b.val = load volatile half, half addrspace(1)* %b
+  %a.val = load volatile half, ptr addrspace(1) %a
+  %b.val = load volatile half, ptr addrspace(1) %b
   %r.val = call half @llvm.maxnum.f16(half %a.val, half %b.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -252,12 +252,12 @@ define amdgpu_kernel void @maxnum_f16_imm_a(
 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    half addrspace(1)* %r,
-    half addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %b.val = load half, half addrspace(1)* %b
+  %b.val = load half, ptr addrspace(1) %b
   %r.val = call half @llvm.maxnum.f16(half 3.0, half %b.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -360,12 +360,12 @@ define amdgpu_kernel void @maxnum_f16_imm_b(
 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.maxnum.f16(half %a.val, half 4.0)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -476,14 +476,14 @@ define amdgpu_kernel void @maxnum_v2f16(
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> %b.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -569,12 +569,12 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a(
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -660,12 +660,12 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b(
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -801,14 +801,14 @@ define amdgpu_kernel void @maxnum_v3f16(
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <3 x half> addrspace(1)* %r,
-    <3 x half> addrspace(1)* %a,
-    <3 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %a.val = load <3 x half>, <3 x half> addrspace(1)* %a
-  %b.val = load <3 x half>, <3 x half> addrspace(1)* %b
+  %a.val = load <3 x half>, ptr addrspace(1) %a
+  %b.val = load <3 x half>, ptr addrspace(1) %b
   %r.val = call <3 x half> @llvm.maxnum.v3f16(<3 x half> %a.val, <3 x half> %b.val)
-  store <3 x half> %r.val, <3 x half> addrspace(1)* %r
+  store <3 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -955,14 +955,14 @@ define amdgpu_kernel void @maxnum_v4f16(
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %a,
-    <4 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
-  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+  %a.val = load <4 x half>, ptr addrspace(1) %a
+  %b.val = load <4 x half>, ptr addrspace(1) %b
   %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a.val, <4 x half> %b.val)
-  store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+  store <4 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -1077,12 +1077,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a(
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+  %b.val = load <4 x half>, ptr addrspace(1) %b
   %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
-  store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+  store <4 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
index f8b331c..25194c7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind
-declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(4)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i64, i1) nounwind
 
 
 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
@@ -80,10 +80,8 @@ declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace
 ; SI-DAG: ds_write_b8
 
 ; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
-  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
-  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
-  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
+  call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) %out, ptr addrspace(3) %in, i32 32, i1 false) nounwind
   ret void
 }
 
@@ -125,10 +123,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace
 ; SI-DAG: ds_write_b16
 
 ; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
-  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
-  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
-  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %bcout, i8 addrspace(3)* align 2 %bcin, i32 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
+  call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 2 %out, ptr addrspace(3) align 2 %in, i32 32, i1 false) nounwind
   ret void
 }
 
@@ -144,10 +140,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace
 ; SI: ds_write2_b32
 
 ; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
-  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
-  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
-  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %bcout, i8 addrspace(3)* align 4 %bcin, i32 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
+  call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 4 %out, ptr addrspace(3) align 4 %in, i32 32, i1 false) nounwind
   ret void
 }
 
@@ -161,10 +155,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace
 ; SI: ds_write2_b64
 
 ; SI-DAG: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
-  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
-  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
-  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 8 %bcout, i8 addrspace(3)* align 8 %bcin, i32 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
+  call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 8 %out, ptr addrspace(3) align 8 %in, i32 32, i1 false) nounwind
   ret void
 }
 
@@ -238,10 +230,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace
 ; SI-DAG: buffer_store_byte
 
 ; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
-  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
-  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 32, i1 false) nounwind
   ret void
 }
 
@@ -281,10 +271,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 add
 ; SI-DAG: buffer_store_short
 
 ; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
-  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
-  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %bcout, i8 addrspace(1)* align 2 %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 2 %out, ptr addrspace(1) align 2 %in, i64 32, i1 false) nounwind
   ret void
 }
 
@@ -294,10 +282,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 add
 ; SI: buffer_store_dwordx4
 ; SI: buffer_store_dwordx4
 ; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
-  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
-  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %bcout, i8 addrspace(1)* align 4 %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %out, ptr addrspace(1) align 4 %in, i64 32, i1 false) nounwind
   ret void
 }
 
@@ -307,10 +293,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 add
 ; SI: buffer_store_dwordx4
 ; SI: buffer_store_dwordx4
 ; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
-  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
-  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %bcout, i8 addrspace(1)* align 8 %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 8 %out, ptr addrspace(1) align 8 %in, i64 32, i1 false) nounwind
   ret void
 }
 
@@ -320,10 +304,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 add
 ; SI: buffer_store_dwordx4
 ; SI: buffer_store_dwordx4
 ; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
-  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
-  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 16 %bcout, i8 addrspace(1)* align 16 %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 16 %out, ptr addrspace(1) align 16 %in, i64 32, i1 false) nounwind
   ret void
 }
 
@@ -340,9 +322,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad
 ; SI-DAG: s_load_dwordx2
 ; SI-DAG: buffer_store_dwordx4
 ; SI-DAG: buffer_store_dwordx4
-define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind {
-  %str = bitcast [16 x i8] addrspace(4)* @hello.align4 to i8 addrspace(4)*
-  call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(4)* align 4 %str, i64 32, i1 false)
+define amdgpu_kernel void @test_memcpy_const_string_align4(ptr addrspace(1) noalias %out) nounwind {
+  call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) align 4 %out, ptr addrspace(4) align 4 @hello.align4, i64 32, i1 false)
   ret void
 }
 
@@ -365,8 +346,7 @@ define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noal
 ; SI: buffer_store_byte
 ; SI: buffer_store_byte
 ; SI: buffer_store_byte
-define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind {
-  %str = bitcast [16 x i8] addrspace(4)* @hello.align1 to i8 addrspace(4)*
-  call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(4)* %str, i64 32, i1 false)
+define amdgpu_kernel void @test_memcpy_const_string_align1(ptr addrspace(1) noalias %out) nounwind {
+  call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) %out, ptr addrspace(4) @hello.align1, i64 32, i1 false)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index 4de4268..699ac77 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -141,14 +141,14 @@ define amdgpu_kernel void @minnum_f16_ieee(
 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %a.val = load volatile half, half addrspace(1)* %a
-  %b.val = load volatile half, half addrspace(1)* %b
+  %a.val = load volatile half, ptr addrspace(1) %a
+  %b.val = load volatile half, ptr addrspace(1) %b
   %r.val = call half @llvm.minnum.f16(half %a.val, half %b.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -278,12 +278,12 @@ define amdgpu_kernel void @minnum_f16_imm_a(
 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    half addrspace(1)* %r,
-    half addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %b.val = load half, half addrspace(1)* %b
+  %b.val = load half, ptr addrspace(1) %b
   %r.val = call half @llvm.minnum.f16(half 3.0, half %b.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -385,12 +385,12 @@ define amdgpu_kernel void @minnum_f16_imm_b(
 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.minnum.f16(half %a.val, half 4.0)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -500,14 +500,14 @@ define amdgpu_kernel void @minnum_v2f16_ieee(
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> %b.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -627,12 +627,12 @@ define amdgpu_kernel void @minnum_v2f16_imm_a(
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -717,12 +717,12 @@ define amdgpu_kernel void @minnum_v2f16_imm_b(
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -857,14 +857,14 @@ define amdgpu_kernel void @minnum_v3f16(
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <3 x half> addrspace(1)* %r,
-    <3 x half> addrspace(1)* %a,
-    <3 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %a.val = load <3 x half>, <3 x half> addrspace(1)* %a
-  %b.val = load <3 x half>, <3 x half> addrspace(1)* %b
+  %a.val = load <3 x half>, ptr addrspace(1) %a
+  %b.val = load <3 x half>, ptr addrspace(1) %b
   %r.val = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a.val, <3 x half> %b.val)
-  store <3 x half> %r.val, <3 x half> addrspace(1)* %r
+  store <3 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -1010,14 +1010,14 @@ define amdgpu_kernel void @minnum_v4f16(
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %a,
-    <4 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
-  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+  %a.val = load <4 x half>, ptr addrspace(1) %a
+  %b.val = load <4 x half>, ptr addrspace(1) %b
   %r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a.val, <4 x half> %b.val)
-  store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+  store <4 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -1131,12 +1131,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a(
 ; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-    <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+  %b.val = load <4 x half>, ptr addrspace(1) %b
   %r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
-  store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+  store <4 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
index fdcceea..8d8a525 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
@@ -383,7 +383,7 @@ bb:
   %mul = extractvalue { i64, i1 } %umulo, 0
   %overflow = extractvalue { i64, i1 } %umulo, 1
   %res = select i1 %overflow, i64 0, i64 %mul
-  store i64 %res, i64 addrspace(1)* undef
+  store i64 %res, ptr addrspace(1) undef
   ret void
 }
 
@@ -567,7 +567,7 @@ bb:
   %mul = extractvalue { i64, i1 } %umulo, 0
   %overflow = extractvalue { i64, i1 } %umulo, 1
   %res = select i1 %overflow, i64 0, i64 %mul
-  store i64 %res, i64 addrspace(1)* undef
+  store i64 %res, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
index 657b14d..9f2c193 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
@@ -5,14 +5,14 @@
 ; GFX908: v_mul_legacy_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
 ; GFX90A: v_mul_legacy_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
 define amdgpu_kernel void @mul_legacy(
-    float addrspace(1)* %r,
-    float addrspace(1)* %a,
-    float addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
 entry:
-  %a.val = load volatile float, float addrspace(1)* %a
-  %b.val = load volatile float, float addrspace(1)* %b
+  %a.val = load volatile float, ptr addrspace(1) %a
+  %b.val = load volatile float, ptr addrspace(1) %b
   %r.val = call float @llvm.pow.f32(float %a.val, float %b.val)
-  store float %r.val, float addrspace(1)* %r
+  store float %r.val, ptr addrspace(1) %r
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
index 5b3d513..8c82f53 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
@@ -14,12 +14,12 @@ declare <2 x half> @llvm.rint.v2f16(<2 x half> %a)
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @rint_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.rint.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -49,11 +49,11 @@ entry:
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @rint_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.rint.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
index cfffaa0..87ed956 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
@@ -11,20 +11,20 @@
 ; SI: v_cndmask_b32
 ; SI: v_cndmask_b32
 ; SI: s_endpgm
-define amdgpu_kernel void @rint_f64(double addrspace(1)* %out, double %in) {
+define amdgpu_kernel void @rint_f64(ptr addrspace(1) %out, double %in) {
 entry:
   %0 = call double @llvm.rint.f64(double %in)
-  store double %0, double addrspace(1)* %out
+  store double %0, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}rint_v2f64:
 ; CI: v_rndne_f64_e32
 ; CI: v_rndne_f64_e32
-define amdgpu_kernel void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @rint_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
 entry:
   %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
-  store <2 x double> %0, <2 x double> addrspace(1)* %out
+  store <2 x double> %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -33,10 +33,10 @@ entry:
 ; CI: v_rndne_f64_e32
 ; CI: v_rndne_f64_e32
 ; CI: v_rndne_f64_e32
-define amdgpu_kernel void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @rint_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
 entry:
   %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
-  store <4 x double> %0, <4 x double> addrspace(1)* %out
+  store <4 x double> %0, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
index 4056bc3..cabb9dc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
@@ -6,10 +6,10 @@
 ; R600: RNDNE
 
 ; SI: v_rndne_f32_e32
-define amdgpu_kernel void @rint_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @rint_f32(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = call float @llvm.rint.f32(float %in) #0
-  store float %0, float addrspace(1)* %out
+  store float %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -19,10 +19,10 @@ entry:
 
 ; SI: v_rndne_f32_e32
 ; SI: v_rndne_f32_e32
-define amdgpu_kernel void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @rint_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 entry:
   %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  store <2 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -36,10 +36,10 @@ entry:
 ; SI: v_rndne_f32_e32
 ; SI: v_rndne_f32_e32
 ; SI: v_rndne_f32_e32
-define amdgpu_kernel void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @rint_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0
-  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  store <4 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
index 8b21896..4ed575d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=CI %s
 
-define amdgpu_kernel void @round_f64(double addrspace(1)* %out, double %x) #0 {
+define amdgpu_kernel void @round_f64(ptr addrspace(1) %out, double %x) #0 {
 ; SI-LABEL: round_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -59,11 +59,11 @@ define amdgpu_kernel void @round_f64(double addrspace(1)* %out, double %x) #0 {
 ; CI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; CI-NEXT:    s_endpgm
   %result = call double @llvm.round.f64(double %x) #1
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; SI-LABEL: v_round_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -128,15 +128,15 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa
 ; CI-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %x = load double, double addrspace(1)* %gep
+  %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %x = load double, ptr addrspace(1) %gep
   %result = call double @llvm.round.f64(double %x) #1
-  store double %result, double addrspace(1)* %out.gep
+  store double %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #0 {
+define amdgpu_kernel void @round_v2f64(ptr addrspace(1) %out, <2 x double> %in) #0 {
 ; SI-LABEL: round_v2f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -219,11 +219,11 @@ define amdgpu_kernel void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x dou
 ; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) #1
-  store <2 x double> %result, <2 x double> addrspace(1)* %out
+  store <2 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #0 {
+define amdgpu_kernel void @round_v4f64(ptr addrspace(1) %out, <4 x double> %in) #0 {
 ; SI-LABEL: round_v4f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x11
@@ -364,11 +364,11 @@ define amdgpu_kernel void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x dou
 ; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1
-  store <4 x double> %result, <4 x double> addrspace(1)* %out
+  store <4 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %in) #0 {
+define amdgpu_kernel void @round_v8f64(ptr addrspace(1) %out, <8 x double> %in) #0 {
 ; SI-LABEL: round_v8f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x19
@@ -625,7 +625,7 @@ define amdgpu_kernel void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x dou
 ; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[20:23], 0
 ; CI-NEXT:    s_endpgm
   %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1
-  store <8 x double> %result, <8 x double> addrspace(1)* %out
+  store <8 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
index ae029f5..6525c02 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
@@ -21,9 +21,9 @@
 ; R600-DAG: SETGE
 ; R600-DAG: CNDE
 ; R600-DAG: ADD
-define amdgpu_kernel void @round_f32(float addrspace(1)* %out, float %x) #0 {
+define amdgpu_kernel void @round_f32(ptr addrspace(1) %out, float %x) #0 {
   %result = call float @llvm.round.f32(float %x) #1
-  store float %result, float addrspace(1)* %out
+  store float %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -35,27 +35,27 @@ define amdgpu_kernel void @round_f32(float addrspace(1)* %out, float %x) #0 {
 ; FUNC-LABEL: {{^}}round_v2f32:
 ; GCN: s_endpgm
 ; R600: CF_END
-define amdgpu_kernel void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 {
+define amdgpu_kernel void @round_v2f32(ptr addrspace(1) %out, <2 x float> %in) #0 {
   %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1
-  store <2 x float> %result, <2 x float> addrspace(1)* %out
+  store <2 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}round_v4f32:
 ; GCN: s_endpgm
 ; R600: CF_END
-define amdgpu_kernel void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 {
+define amdgpu_kernel void @round_v4f32(ptr addrspace(1) %out, <4 x float> %in) #0 {
   %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  store <4 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}round_v8f32:
 ; GCN: s_endpgm
 ; R600: CF_END
-define amdgpu_kernel void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 {
+define amdgpu_kernel void @round_v8f32(ptr addrspace(1) %out, <8 x float> %in) #0 {
   %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1
-  store <8 x float> %result, <8 x float> addrspace(1)* %out
+  store <8 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -72,11 +72,11 @@ define amdgpu_kernel void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x floa
 ; GFX89: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[COPYSIGN]]
 ; GFX89: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TRUNC]], [[SEL]]
 ; GFX89: buffer_store_short [[RESULT]]
-define amdgpu_kernel void @round_f16(half addrspace(1)* %out, i32 %x.arg) #0 {
+define amdgpu_kernel void @round_f16(ptr addrspace(1) %out, i32 %x.arg) #0 {
   %x.arg.trunc = trunc i32 %x.arg to i16
   %x = bitcast i16 %x.arg.trunc to half
   %result = call half @llvm.round.f16(half %x) #1
-  store half %result, half addrspace(1)* %out
+  store half %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -88,10 +88,10 @@ define amdgpu_kernel void @round_f16(half addrspace(1)* %out, i32 %x.arg) #0 {
 ; GFX89: v_bfi_b32 [[COPYSIGN1:v[0-9]+]], [[K]], [[BFI_K]],
 
 ; GFX9: v_pack_b32_f16
-define amdgpu_kernel void @round_v2f16(<2 x half> addrspace(1)* %out, i32 %in.arg) #0 {
+define amdgpu_kernel void @round_v2f16(ptr addrspace(1) %out, i32 %in.arg) #0 {
   %in = bitcast i32 %in.arg to <2 x half>
   %result = call <2 x half> @llvm.round.v2f16(<2 x half> %in)
-  store <2 x half> %result, <2 x half> addrspace(1)* %out
+  store <2 x half> %result, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
index 983fcca..2ad122c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-define amdgpu_kernel void @sin_f16(half addrspace(1)* %r, half addrspace(1)* %a) {
+define amdgpu_kernel void @sin_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX6-LABEL: sin_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -81,13 +81,13 @@ define amdgpu_kernel void @sin_f16(half addrspace(1)* %r, half addrspace(1)* %a)
 ; GFX11-NEXT:    global_store_b16 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.sin.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
-define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) {
+define amdgpu_kernel void @sin_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX6-LABEL: sin_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -190,9 +190,9 @@ define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.sin.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
index 7f033a6..6431c39 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
@@ -16,9 +16,9 @@
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
    %sin = call float @llvm.sin.f32(float %x)
-   store float %sin, float addrspace(1)* %out
+   store float %sin, ptr addrspace(1) %out
    ret void
 }
 
@@ -29,10 +29,10 @@ define amdgpu_kernel void @sin_f32(float addrspace(1)* %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
   %y = fmul float 3.0, %x
   %sin = call float @llvm.sin.f32(float %y)
-  store float %sin, float addrspace(1)* %out
+  store float %sin, ptr addrspace(1) %out
   ret void
 }
 
@@ -44,10 +44,10 @@ define amdgpu_kernel void @safe_sin_3x_f32(float addrspace(1)* %out, float %x) #
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x) #2 {
+define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 {
   %y = fmul float 3.0, %x
   %sin = call float @llvm.sin.f32(float %y)
-  store float %sin, float addrspace(1)* %out
+  store float %sin, ptr addrspace(1) %out
   ret void
 }
 
@@ -59,10 +59,10 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x)
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
   %y = fmul reassoc float 3.0, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
-  store float %sin, float addrspace(1)* %out
+  store float %sin, ptr addrspace(1) %out
   ret void
 }
 
@@ -73,10 +73,10 @@ define amdgpu_kernel void @fmf_sin_3x_f32(float addrspace(1)* %out, float %x) #1
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
   %y = fmul float 2.0, %x
   %sin = call float @llvm.sin.f32(float %y)
-  store float %sin, float addrspace(1)* %out
+  store float %sin, ptr addrspace(1) %out
   ret void
 }
 
@@ -88,10 +88,10 @@ define amdgpu_kernel void @safe_sin_2x_f32(float addrspace(1)* %out, float %x) #
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x) #2 {
+define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 {
   %y = fmul float 2.0, %x
   %sin = call float @llvm.sin.f32(float %y)
-  store float %sin, float addrspace(1)* %out
+  store float %sin, ptr addrspace(1) %out
   ret void
 }
 
@@ -103,10 +103,10 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x)
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
   %y = fmul reassoc float 2.0, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
-  store float %sin, float addrspace(1)* %out
+  store float %sin, ptr addrspace(1) %out
   ret void
 }
 
@@ -117,10 +117,10 @@ define amdgpu_kernel void @fmf_sin_2x_f32(float addrspace(1)* %out, float %x) #1
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_cancel_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
   %y = fmul float 0x401921FB60000000, %x
   %sin = call float @llvm.sin.f32(float %y)
-  store float %sin, float addrspace(1)* %out
+  store float %sin, ptr addrspace(1) %out
   ret void
 }
 
@@ -131,10 +131,10 @@ define amdgpu_kernel void @safe_sin_cancel_f32(float addrspace(1)* %out, float %
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_cancel_f32(float addrspace(1)* %out, float %x) #2 {
+define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #2 {
   %y = fmul float 0x401921FB60000000, %x
   %sin = call float @llvm.sin.f32(float %y)
-  store float %sin, float addrspace(1)* %out
+  store float %sin, ptr addrspace(1) %out
   ret void
 }
 
@@ -145,10 +145,10 @@ define amdgpu_kernel void @unsafe_sin_cancel_f32(float addrspace(1)* %out, float
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_cancel_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
   %y = fmul reassoc float 0x401921FB60000000, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
-  store float %sin, float addrspace(1)* %out
+  store float %sin, ptr addrspace(1) %out
   ret void
 }
 
@@ -164,9 +164,9 @@ define amdgpu_kernel void @fmf_sin_cancel_f32(float addrspace(1)* %out, float %x
 ; GCN: v_sin_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
+define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) #1 {
    %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
-   store <4 x float> %sin, <4 x float> addrspace(1)* %out
+   store <4 x float> %sin, ptr addrspace(1) %out
    ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
index 1e00e09..f90176b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.sqrt.v2f16(<2 x half> %a)
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @sqrt_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.sqrt.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -43,11 +43,11 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @sqrt_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
index 1e9212e..c36d337 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a)
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @trunc_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.trunc.f16(half %a.val)
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -43,11 +43,11 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @trunc_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.trunc.v2f16(<2 x half> %a.val)
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }