From 6a91a5e82647f206a31706586d201ecc638e9365 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 29 Nov 2022 18:41:50 -0500
Subject: [PATCH] AMDGPU: Convert some cast tests to opaque pointers

---
 llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll |  32 +--
 .../CodeGen/AMDGPU/addrspacecast-constantexpr.ll   | 136 ++++++------
 .../addrspacecast-initializer-unsupported.ll       |   4 +-
 .../CodeGen/AMDGPU/addrspacecast-initializer.ll    |   6 +-
 .../CodeGen/AMDGPU/addrspacecast-known-non-null.ll |  20 +-
 llvm/test/CodeGen/AMDGPU/addrspacecast.ll          | 152 +++++++------
 .../test/CodeGen/AMDGPU/any_extend_vector_inreg.ll |  17 +-
 llvm/test/CodeGen/AMDGPU/anyext.ll                 |  20 +-
 .../AMDGPU/codegen-prepare-addrmode-sext.ll        |   6 +-
 llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll  |   8 +-
 llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll           |   6 +-
 llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll           |   6 +-
 llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll           |   6 +-
 llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll         |  28 +--
 llvm/test/CodeGen/AMDGPU/fp_to_sint.ll             |  42 ++--
 llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll         |  36 +--
 llvm/test/CodeGen/AMDGPU/fp_to_uint.ll             |  38 ++--
 llvm/test/CodeGen/AMDGPU/fpext.f16.ll              | 120 +++++-----
 llvm/test/CodeGen/AMDGPU/fpext.ll                  |  20 +-
 llvm/test/CodeGen/AMDGPU/fptosi.f16.ll             |  52 ++---
 llvm/test/CodeGen/AMDGPU/fptoui.f16.ll             |  52 ++---
 llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll  |   6 +-
 llvm/test/CodeGen/AMDGPU/setcc-sext.ll             |  52 ++---
 llvm/test/CodeGen/AMDGPU/sext-eliminate.ll         |   8 +-
 llvm/test/CodeGen/AMDGPU/sext-in-reg.ll            | 246 ++++++++++-----------
 llvm/test/CodeGen/AMDGPU/sign_extend.ll            |  82 +++----
 llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll         |  52 ++---
 llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll         |  56 ++---
 llvm/test/CodeGen/AMDGPU/sint_to_fp.ll             |  52 ++---
 llvm/test/CodeGen/AMDGPU/sitofp.f16.ll             |  40 ++--
 llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll         |  68 +++---
 llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll         |  56 ++---
 llvm/test/CodeGen/AMDGPU/uint_to_fp.ll             |  56 ++---
 llvm/test/CodeGen/AMDGPU/uitofp.f16.ll             |  40 ++--
 34 files changed, 808 insertions(+), 813 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
index 6e3550b..db5d39e 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
@@ -7,40 +7,40 @@ declare void @consume_ptr2int(i32) #0
 
 ; CHECK-LABEL: @addrspacecast_captured(
 ; CHECK: %data = alloca i32, align 4, addrspace(5)
-; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32*
-; CHECK: %ptr2int = ptrtoint i32* %cast to i32
-; CHECK: store i32 %ptr2int, i32 addrspace(1)* %out
-define amdgpu_kernel void @addrspacecast_captured(i32 addrspace(1)* %out) #0 {
+; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr
+; CHECK: %ptr2int = ptrtoint ptr %cast to i32
+; CHECK: store i32 %ptr2int, ptr addrspace(1) %out
+define amdgpu_kernel void @addrspacecast_captured(ptr addrspace(1) %out) #0 {
 entry:
   %data = alloca i32, align 4, addrspace(5)
-  %cast = addrspacecast i32 addrspace(5)* %data to i32*
-  %ptr2int = ptrtoint i32* %cast to i32
-  store i32 %ptr2int, i32 addrspace(1)* %out
+  %cast = addrspacecast ptr addrspace(5) %data to ptr
+  %ptr2int = ptrtoint ptr %cast to i32
+  store i32 %ptr2int, ptr addrspace(1) %out
   ret void
 }
 
 ; CHECK-LABEL: @addrspacecast_captured_store(
 ; CHECK: %data = alloca i32, align 4, addrspace(5)
-; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32*
-; CHECK: store i32* %cast, i32* addrspace(1)* %out
-define amdgpu_kernel void @addrspacecast_captured_store(i32* addrspace(1)* %out) #0 {
+; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr
+; CHECK: store ptr %cast, ptr addrspace(1) %out
+define amdgpu_kernel void @addrspacecast_captured_store(ptr addrspace(1) %out) #0 {
 entry:
   %data = alloca i32, align 4, addrspace(5)
-  %cast = addrspacecast i32 addrspace(5)* %data to i32*
-  store i32* %cast, i32* addrspace(1)* %out
+  %cast = addrspacecast ptr addrspace(5) %data to ptr
+  store ptr %cast, ptr addrspace(1) %out
   ret void
 }
 
 ; CHECK-LABEL: @addrspacecast_captured_call(
 ; CHECK: %data = alloca i32, align 4, addrspace(5)
-; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32*
-; CHECK: %ptr2int = ptrtoint i32* %cast to i32
+; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr
+; CHECK: %ptr2int = ptrtoint ptr %cast to i32
 ; CHECK: call void @consume_ptr2int(i32 %ptr2int)
 define amdgpu_kernel void @addrspacecast_captured_call() #0 {
 entry:
   %data = alloca i32, align 4, addrspace(5)
-  %cast = addrspacecast i32 addrspace(5)* %data to i32*
-  %ptr2int = ptrtoint i32* %cast to i32
+  %cast = addrspacecast ptr addrspace(5) %data to ptr
+  %ptr2int = ptrtoint ptr %cast to i32
   call void @consume_ptr2int(i32 %ptr2int)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index b3be370..66f249f 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
 
-declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0
+declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i32, i1) #0
 
 @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
@@ -19,179 +19,179 @@ declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrs
 define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
 ; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
 ; HSA-SAME: () #[[ATTR1:[0-9]+]] {
-; HSA-NEXT:    store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), align 4
+; HSA-NEXT:    store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 4
 ; HSA-NEXT:    ret void
 ;
-  store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+  store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3))
   ret void
 }
 
 define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
 ; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4
+; AKF_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
 ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2:[0-9]+]] {
-; ATTRIBUTOR_HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4
+; ATTRIBUTOR_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*)
+  store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4))
   ret void
 }
 
 define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
 ; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4
+; AKF_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
 ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4
+; ATTRIBUTOR_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*)
+  store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4))
   ret void
 }
 
 define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
 ; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT:    store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+; AKF_HSA-NEXT:    store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
 ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+; ATTRIBUTOR_HSA-NEXT:    store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
+  store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8)
   ret void
 }
 
 define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
 ; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat
 ; HSA-SAME: () #[[ATTR1]] {
-; HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*), align 4
+; HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.i32 to ptr addrspace(4)), align 4
 ; HSA-NEXT:    ret void
 ;
-  store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*)
+  store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.i32 to ptr addrspace(4))
   ret void
 }
 
 define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
 ; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat
 ; HSA-SAME: () #[[ATTR1]] {
-; HSA-NEXT:    store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+; HSA-NEXT:    store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(4)), i64 0, i64 8), align 4
 ; HSA-NEXT:    ret void
 ;
-  store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
+  store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(4)), i64 0, i64 8)
   ret void
 }
 
-define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
-; AKF_HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
+; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
-; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
+; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
-  store i32 %val, i32 addrspace(1)* %out
+  %val = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8)
+  store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT:    [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4
-; AKF_HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT:    [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
+; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4
-; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
+; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst
-  store i32 %val, i32 addrspace(1)* %out
+  %val = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst
+  store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT:    [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT:    [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
-; AKF_HSA-NEXT:    store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4
+; AKF_HSA-NEXT:    store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
-; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4
+; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst
+  %val = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst
   %val0 = extractvalue { i32, i1 } %val, 0
-  store i32 %val0, i32 addrspace(1)* %out
+  store i32 %val0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT:    call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT:    call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT:    call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
+  call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 %out, ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
   ret void
 }
 
 ; Can't just search the pointer value
-define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 {
+define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT:    store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT:    store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT:    store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out
+  store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) %out
   ret void
 }
 
 ; Can't just search pointer types
-define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
-; AKF_HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT:    store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT:    store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT:    store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out
+  store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) %out
   ret void
 }
 
@@ -199,28 +199,28 @@ define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
 define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
 ; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT:    store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4
+; AKF_HSA-NEXT:    store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
 ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4
+; ATTRIBUTOR_HSA-NEXT:    store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
+  store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
   ret void
 }
 
-define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
+define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
 ; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT:    ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
+; AKF_HSA-NEXT:    ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
 ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT:    ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
+; ATTRIBUTOR_HSA-NEXT:    ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
 ;
-  ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
+  ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
 }
 
 attributes #0 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll
index 593e37f..cbd3e68 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll
@@ -1,7 +1,7 @@
 ; RUN: not --crash llc -march=amdgcn -verify-machineinstrs -amdgpu-enable-lower-module-lds=false < %s 2>&1 | FileCheck -check-prefix=ERROR %s
 
-; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*)
+; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4))
 
 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
 
-@gv_flatptr_from_lds = unnamed_addr addrspace(2) global i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+@gv_flatptr_from_lds = unnamed_addr addrspace(2) global ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll
index 4f5082f..e22fcd8 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll
@@ -19,9 +19,9 @@
 @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
 @constant.arr = external unnamed_addr addrspace(4) global [256 x i32], align 4
 
-@gv_flatptr_from_global = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4
+@gv_flatptr_from_global = unnamed_addr addrspace(4) global ptr addrspace(0) getelementptr ([256 x i32], ptr addrspace(0) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(0)), i64 0, i64 8), align 4
 
 
-@gv_global_ptr = unnamed_addr addrspace(4) global i32 addrspace(1)* getelementptr ([256 x i32], [256 x i32] addrspace(1)* @global.arr, i64 0, i64 8), align 4
+@gv_global_ptr = unnamed_addr addrspace(4) global ptr addrspace(1) getelementptr ([256 x i32], ptr addrspace(1) @global.arr, i64 0, i64 8), align 4
 
-@gv_flatptr_from_constant = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(4)* @constant.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4
+@gv_flatptr_from_constant = unnamed_addr addrspace(4) global ptr addrspace(0) getelementptr ([256 x i32], ptr addrspace(0) addrspacecast (ptr addrspace(4) @constant.arr to ptr addrspace(0)), i64 0, i64 8), align 4
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
index 2edaa88..5de8a6f 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
@@ -4,8 +4,8 @@
 ; Test that a null check is not emitted for lowered addrspacecast
 
 
-define void @flat_user(i8* %ptr) {
-  store i8 0, i8* %ptr
+define void @flat_user(ptr %ptr) {
+  store i8 0, ptr %ptr
   ret void
 }
 
@@ -18,8 +18,8 @@ define void @flat_user(i8* %ptr) {
 ; CHECK-NOT: v1
 define void @cast_alloca() {
   %alloca = alloca i8, addrspace(5)
-  %cast = addrspacecast i8 addrspace(5)* %alloca to i8*
-  call void @flat_user(i8* %cast)
+  %cast = addrspacecast ptr addrspace(5) %alloca to ptr
+  call void @flat_user(ptr %cast)
   ret void
 }
 
@@ -33,8 +33,8 @@ define void @cast_alloca() {
 ; CHECK-NOT: v0
 ; CHECK-NOT: v1
 define void @cast_lds_gv() {
-  %cast = addrspacecast i8 addrspace(3)* @lds to i8*
-  call void @flat_user(i8* %cast)
+  %cast = addrspacecast ptr addrspace(3) @lds to ptr
+  call void @flat_user(ptr %cast)
   ret void
 }
 
@@ -42,7 +42,7 @@ define void @cast_lds_gv() {
 ; CHECK: v_mov_b32_e32 v0, 0
 ; CHECK: v_mov_b32_e32 v1, 0
 define void @cast_constant_lds_neg1_gv() {
-  call void @flat_user(i8* addrspacecast (i8 addrspace(3)* inttoptr (i32 -1 to i8 addrspace(3)*) to i8*))
+  call void @flat_user(ptr addrspacecast (ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr))
   ret void
 }
 
@@ -50,7 +50,7 @@ define void @cast_constant_lds_neg1_gv() {
 ; CHECK: v_mov_b32_e32 v0, 0
 ; CHECK: v_mov_b32_e32 v1, 0
 define void @cast_constant_private_neg1_gv() {
-  call void @flat_user(i8* addrspacecast (i8 addrspace(5)* inttoptr (i32 -1 to i8 addrspace(5)*) to i8*))
+  call void @flat_user(ptr addrspacecast (ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr))
   ret void
 }
 
@@ -60,7 +60,7 @@ define void @cast_constant_private_neg1_gv() {
 ; CHECK: v_mov_b32_e32 v0, 0x7b
 ; CHECK: v_mov_b32_e32 v1, [[APERTURE]]
 define void @cast_constant_lds_other_gv() {
-  call void @flat_user(i8* addrspacecast (i8 addrspace(3)* inttoptr (i32 123 to i8 addrspace(3)*) to i8*))
+  call void @flat_user(ptr addrspacecast (ptr addrspace(3) inttoptr (i32 123 to ptr addrspace(3)) to ptr))
   ret void
 }
 
@@ -70,6 +70,6 @@ define void @cast_constant_lds_other_gv() {
 ; CHECK: v_mov_b32_e32 v0, 0x7b
 ; CHECK: v_mov_b32_e32 v1, [[APERTURE]]
 define void @cast_constant_private_other_gv() {
-  call void @flat_user(i8* addrspacecast (i8 addrspace(5)* inttoptr (i32 123 to i8 addrspace(5)*) to i8*))
+  call void @flat_user(ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr))
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index f576079..b446166 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -29,9 +29,9 @@
 ; number SGPR.
 
 ; HSA: NumSgprs: {{[0-9]+}}
-define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
-  %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
-  store volatile i32 7, i32* %stof
+define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #0 {
+  %stof = addrspacecast ptr addrspace(3) %ptr to ptr
+  store volatile i32 7, ptr %stof
   ret void
 }
 
@@ -54,9 +54,9 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt
 ; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
 
 ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
-define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 {
-  %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
-  store volatile i32 7, i32* %stof
+define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
+  %stof = addrspacecast ptr addrspace(3) %ptr to ptr
+  store volatile i32 7, ptr %stof
   ret void
 }
 
@@ -88,9 +88,9 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 {
 ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
 
 ; HSA: NumSgprs: {{[0-9]+}}
-define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 {
-  %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
-  store volatile i32 7, i32* %stof
+define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
+  %stof = addrspacecast ptr addrspace(5) %ptr to ptr
+  store volatile i32 7, ptr %stof
   ret void
 }
 
@@ -103,9 +103,9 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %
 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
 ; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
-define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
-  %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
-  store volatile i32 7, i32* %stof
+define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 {
+  %stof = addrspacecast ptr addrspace(1) %ptr to ptr
+  store volatile i32 7, ptr %stof
   ret void
 }
 
@@ -115,9 +115,9 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %p
 ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
 ; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
-define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #0 {
-  %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
-  %ld = load volatile i32, i32* %stof
+define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #0 {
+  %stof = addrspacecast ptr addrspace(4) %ptr to ptr
+  %ld = load volatile i32, ptr %stof
   ret void
 }
 
@@ -129,9 +129,9 @@ define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)*
 
 ; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; GFX9: global_load_dword v{{[0-9]+}}, [[ZERO:v[0-9]+]], s[[[PTRLO]]:[[PTRHI]]]
-define amdgpu_kernel void @use_constant_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
-  %stof = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
-  %ld = load volatile i32, i32 addrspace(1)* %stof
+define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4) %ptr) #0 {
+  %stof = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(1)
+  %ld = load volatile i32, ptr addrspace(1) %stof
   ret void
 }
 
@@ -151,9 +151,9 @@ define amdgpu_kernel void @use_constant_to_global_addrspacecast(i32 addrspace(4)
 ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
 ; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]]
 ; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]]
-define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 {
-  %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
-  store volatile i32 0, i32 addrspace(3)* %ftos
+define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
+  %ftos = addrspacecast ptr %ptr to ptr addrspace(3)
+  store volatile i32 0, ptr addrspace(3) %ftos
   ret void
 }
 
@@ -176,9 +176,9 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 {
 ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
 ; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
 ; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
-define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 {
-  %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
-  store volatile i32 0, i32 addrspace(5)* %ftos
+define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
+  %ftos = addrspacecast ptr %ptr to ptr addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %ftos
   ret void
 }
 
@@ -193,9 +193,9 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 {
 
 ; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
 ; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}}
-define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
-  %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
-  store volatile i32 0, i32 addrspace(1)* %ftos
+define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 {
+  %ftos = addrspacecast ptr %ptr to ptr addrspace(1)
+  store volatile i32 0, ptr addrspace(1) %ftos
   ret void
 }
 
@@ -204,9 +204,9 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
 
 ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
 ; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0
-define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
-  %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
-  load volatile i32, i32 addrspace(4)* %ftos
+define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
+  %ftos = addrspacecast ptr %ptr to ptr addrspace(4)
+  load volatile i32, ptr addrspace(4) %ftos
   ret void
 }
 
@@ -223,8 +223,8 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
 define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
-  %cast = addrspacecast i32 addrspace(3)* null to i32*
-  store volatile i32 7, i32* %cast
+  %cast = addrspacecast ptr addrspace(3) null to ptr
+  store volatile i32 7, ptr %cast
   ret void
 }
 
@@ -233,8 +233,8 @@ define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
 ; HSA: ds_write_b32 [[PTR]], [[K]]
 define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
-  %cast = addrspacecast i32* null to i32 addrspace(3)*
-  store volatile i32 7, i32 addrspace(3)* %cast
+  %cast = addrspacecast ptr null to ptr addrspace(3)
+  store volatile i32 7, ptr addrspace(3) %cast
   ret void
 }
 
@@ -244,8 +244,8 @@ define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
 define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
-  %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32*
-  store volatile i32 7, i32* %cast
+  %cast = addrspacecast ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr
+  store volatile i32 7, ptr %cast
   ret void
 }
 
@@ -254,8 +254,8 @@ define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
 ; HSA: ds_write_b32 [[PTR]], [[K]]
 define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
-  %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(3)*
-  store volatile i32 7, i32 addrspace(3)* %cast
+  %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(3)
+  store volatile i32 7, ptr addrspace(3) %cast
   ret void
 }
 
@@ -273,8 +273,8 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
 define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
-  %cast = addrspacecast i32 addrspace(5)* null to i32*
-  store volatile i32 7, i32* %cast
+  %cast = addrspacecast ptr addrspace(5) null to ptr
+  store volatile i32 7, ptr %cast
   ret void
 }
 
@@ -283,8 +283,8 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
 ; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
 define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
-  %cast = addrspacecast i32* null to i32 addrspace(5)*
-  store volatile i32 7, i32 addrspace(5)* %cast
+  %cast = addrspacecast ptr null to ptr addrspace(5)
+  store volatile i32 7, ptr addrspace(5) %cast
   ret void
 }
 
@@ -298,8 +298,8 @@ define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
 define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
-  %cast = addrspacecast i32 addrspace(5)* inttoptr (i32 -1 to i32 addrspace(5)*) to i32*
-  store volatile i32 7, i32* %cast
+  %cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr
+  store volatile i32 7, ptr %cast
   ret void
 }
 
@@ -308,8 +308,8 @@ define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
 ; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
 define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 {
-  %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(5)*
-  store volatile i32 7, i32 addrspace(5)* %cast
+  %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(5)
+  store volatile i32 7, ptr addrspace(5) %cast
   ret void
 }
 
@@ -320,24 +320,24 @@ define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 {
 ; HSA-LABEL: {{^}}branch_use_flat_i32:
 ; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
 ; HSA: s_endpgm
-define amdgpu_kernel void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
+define amdgpu_kernel void @branch_use_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 %x, i32 %c) #0 {
 entry:
   %cmp = icmp ne i32 %c, 0
   br i1 %cmp, label %local, label %global
 
 local:
-  %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32*
+  %flat_local = addrspacecast ptr addrspace(3) %lptr to ptr
   br label %end
 
 global:
-  %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32*
+  %flat_global = addrspacecast ptr addrspace(1) %gptr to ptr
   br label %end
 
 end:
-  %fptr = phi i32* [ %flat_local, %local ], [ %flat_global, %global ]
-  store volatile i32 %x, i32* %fptr, align 4
-;  %val = load i32, i32* %fptr, align 4
-;  store i32 %val, i32 addrspace(1)* %out, align 4
+  %fptr = phi ptr [ %flat_local, %local ], [ %flat_global, %global ]
+  store volatile i32 %x, ptr %fptr, align 4
+;  %val = load i32, ptr %fptr, align 4
+;  store i32 %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -353,16 +353,16 @@ end:
 ; HSA: {{flat|global}}_store_dword
 ; HSA: s_barrier
 ; HSA: {{flat|global}}_load_dword
-define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
+define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) #0 {
   %alloca = alloca i32, i32 9, align 4, addrspace(5)
   %x = call i32 @llvm.amdgcn.workitem.id.x() #2
-  %pptr = getelementptr i32, i32 addrspace(5)* %alloca, i32 %x
-  %fptr = addrspacecast i32 addrspace(5)* %pptr to i32*
-  store volatile i32 %x, i32* %fptr
+  %pptr = getelementptr i32, ptr addrspace(5) %alloca, i32 %x
+  %fptr = addrspacecast ptr addrspace(5) %pptr to ptr
+  store volatile i32 %x, ptr %fptr
   ; Dummy call
   call void @llvm.amdgcn.s.barrier() #1
-  %reload = load volatile i32, i32* %fptr, align 4
-  store volatile i32 %reload, i32 addrspace(1)* %out, align 4
+  %reload = load volatile i32, ptr %fptr, align 4
+  store volatile i32 %reload, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -373,12 +373,11 @@ define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i3
 ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
 ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
 ; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
-define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(i8 addrspace(4)* addrspace(4)* %ptr.ptr, i32 %offset) #0 {
-  %ptr = load volatile i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %ptr.ptr
-  %addrspacecast = addrspacecast i8 addrspace(4)* %ptr to i8 addrspace(6)*
-  %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset
-  %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)*
-  %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4
+define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
+  %ptr = load volatile ptr addrspace(4), ptr addrspace(4) %ptr.ptr
+  %addrspacecast = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(6)
+  %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
+  %load = load volatile i32, ptr addrspace(6) %gep, align 4
   ret void
 }
 
@@ -389,12 +388,11 @@ define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(i8 addrspace
 ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
 ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
 ; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
-define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1)* addrspace(4)* %ptr.ptr, i32 %offset) #0 {
-  %ptr = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* %ptr.ptr
-  %addrspacecast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(6)*
-  %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset
-  %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)*
-  %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4
+define amdgpu_kernel void @use_global_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) %ptr.ptr
+  %addrspacecast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(6)
+  %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
+  %load = load volatile i32, ptr addrspace(6) %gep, align 4
   ret void
 }
 
@@ -403,9 +401,9 @@ define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1
 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
 ; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(i32 addrspace(6)* %ptr) #0 {
-  %stof = addrspacecast i32 addrspace(6)* %ptr to i32*
-  %load = load volatile i32, i32* %stof
+define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(ptr addrspace(6) %ptr) #0 {
+  %stof = addrspacecast ptr addrspace(6) %ptr to ptr
+  %load = load volatile i32, ptr %stof
   ret void
 }
 
@@ -414,9 +412,9 @@ define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(i32 addrspa
 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0xffff8000
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
 ; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(i32 addrspace(6)* %ptr) #3 {
-  %stof = addrspacecast i32 addrspace(6)* %ptr to i32*
-  %load = load volatile i32, i32* %stof
+define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) #3 {
+  %stof = addrspacecast ptr addrspace(6) %ptr to ptr
+  %load = load volatile i32, ptr %stof
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll b/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
index 2e5c32c..dc7fc9f 100644
--- a/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
@@ -24,20 +24,17 @@
 ; GCN: {{buffer|flat}}_store_byte
 ; GCN: {{buffer|flat}}_store_byte
 ; GCN: {{buffer|flat}}_store_byte
-define amdgpu_kernel void @any_extend_vector_inreg_v16i8_to_v4i32(<8 x i8> addrspace(1)* nocapture readonly %arg, <16 x i8> addrspace(1)* %arg1) local_unnamed_addr #0 {
+define amdgpu_kernel void @any_extend_vector_inreg_v16i8_to_v4i32(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) %arg1) local_unnamed_addr #0 {
 bb:
-  %tmp = bitcast <8 x i8> addrspace(1)* %arg to <16 x i8> addrspace(1)*
-  %tmp2 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp, align 16
+  %tmp2 = load <16 x i8>, ptr addrspace(1) %arg, align 16
   %tmp3 = extractelement <16 x i8> %tmp2, i64 4
   %tmp6 = extractelement <16 x i8> %tmp2, i64 11
-  %tmp10 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %arg, i64 2
-  %tmp11 = bitcast <8 x i8> addrspace(1)* %tmp10 to <16 x i8> addrspace(1)*
-  %tmp12 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp11, align 16
+  %tmp10 = getelementptr inbounds <8 x i8>, ptr addrspace(1) %arg, i64 2
+  %tmp12 = load <16 x i8>, ptr addrspace(1) %tmp10, align 16
   %tmp13 = extractelement <16 x i8> %tmp12, i64 7
   %tmp17 = extractelement <16 x i8> %tmp12, i64 12
-  %tmp21 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %arg, i64 4
-  %tmp22 = bitcast <8 x i8> addrspace(1)* %tmp21 to <16 x i8> addrspace(1)*
-  %tmp23 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp22, align 16
+  %tmp21 = getelementptr inbounds <8 x i8>, ptr addrspace(1) %arg, i64 4
+  %tmp23 = load <16 x i8>, ptr addrspace(1) %tmp21, align 16
   %tmp24 = extractelement <16 x i8> %tmp23, i64 3
   %tmp1 = insertelement <16 x i8> undef, i8 %tmp3, i32 2
   %tmp4 = insertelement <16 x i8> %tmp1, i8 0, i32 3
@@ -50,7 +47,7 @@ bb:
   %tmp16 = insertelement <16 x i8> %tmp15, i8 0, i32 10
   %tmp18 = insertelement <16 x i8> %tmp16, i8 0, i32 11
   %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp24, i32 12
-  store <16 x i8> %tmp19, <16 x i8> addrspace(1)* %arg1, align 1
+  store <16 x i8> %tmp19, ptr addrspace(1) %arg1, align 1
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/anyext.ll b/llvm/test/CodeGen/AMDGPU/anyext.ll
index 34c35ab..68a8b0a 100644
--- a/llvm/test/CodeGen/AMDGPU/anyext.ll
+++ b/llvm/test/CodeGen/AMDGPU/anyext.ll
@@ -6,7 +6,7 @@
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
 
-define amdgpu_kernel void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) #0 {
+define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
 ; GCN-LABEL: anyext_i1_i32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -55,11 +55,11 @@ entry:
   %tmp2 = xor i8 %tmp1, -1
   %tmp3 = and i8 %tmp2, 1
   %tmp4 = zext i8 %tmp3 to i32
-  store i32 %tmp4, i32 addrspace(1)* %out
+  store i32 %tmp4, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %a, i16 addrspace(1)* %b) #0 {
+define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
 ; GCN-LABEL: s_anyext_i16_i32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -132,16 +132,16 @@ define amdgpu_kernel void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspac
 entry:
   %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.y = call i32 @llvm.amdgcn.workitem.id.y()
-  %a.ptr = getelementptr i16, i16 addrspace(1)* %a, i32 %tid.x
-  %b.ptr = getelementptr i16, i16 addrspace(1)* %b, i32 %tid.y
-  %a.l = load i16, i16 addrspace(1)* %a.ptr
-  %b.l = load i16, i16 addrspace(1)* %b.ptr
+  %a.ptr = getelementptr i16, ptr addrspace(1) %a, i32 %tid.x
+  %b.ptr = getelementptr i16, ptr addrspace(1) %b, i32 %tid.y
+  %a.l = load i16, ptr addrspace(1) %a.ptr
+  %b.l = load i16, ptr addrspace(1) %b.ptr
   %tmp = add i16 %a.l, %b.l
   %tmp1 = trunc i16 %tmp to i8
   %tmp2 = xor i8 %tmp1, -1
   %tmp3 = and i8 %tmp2, 1
   %tmp4 = zext i8 %tmp3 to i32
-  store i32 %tmp4, i32 addrspace(1)* %out
+  store i32 %tmp4, ptr addrspace(1) %out
   ret void
 }
 
@@ -186,7 +186,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
 ; GFX9-NEXT:    buffer_store_byte v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
 bb:
-  %tmp = load i16, i16 addrspace(1)* undef, align 2
+  %tmp = load i16, ptr addrspace(1) undef, align 2
   %tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 1
   %tmp4 = and <2 x i16> %tmp2, <i16 -32768, i16 -32768>
   %tmp5 = zext <2 x i16> %tmp4 to <2 x i32>
@@ -196,7 +196,7 @@ bb:
   %tmp10 = fcmp oeq <2 x float> %tmp8, zeroinitializer
   %tmp11 = zext <2 x i1> %tmp10 to <2 x i8>
   %tmp12 = extractelement <2 x i8> %tmp11, i32 1
-  store i8 %tmp12, i8 addrspace(1)* undef, align 1
+  store i8 %tmp12, ptr addrspace(1) undef, align 1
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll
index 155de53..31e1ace 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll
@@ -8,11 +8,11 @@
 ; SI-LLC-LABEL: {{^}}test:
 ; SI-LLC: s_mul_i32
 ; SI-LLC-NOT: mul
-define amdgpu_kernel void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
+define amdgpu_kernel void @test(ptr addrspace(1) nocapture readonly %in, i32 %a, i8 %b) {
 entry:
   %0 = mul nsw i32 %a, 3
   %1 = sext i32 %0 to i64
-  %2 = getelementptr i8, i8 addrspace(1)* %in, i64 %1
-  store i8 %b, i8 addrspace(1)* %2
+  %2 = getelementptr i8, ptr addrspace(1) %in, i64 %1
+  store i8 %b, ptr addrspace(1) %2
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
index cd4ac4d..e85dfed 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
@@ -7,16 +7,16 @@
 ; GCN-NOT: v_cndmask_b32_e64 v{{[0-9]+}}, {{0|-1}}, {{0|-1}}
 ; GCN-NOT: v_and_b32_e32
 
-define amdgpu_kernel void @and_i1_sext_bool(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @and_i1_sext_bool(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
-  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
-  %v = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x
+  %v = load i32, ptr addrspace(1) %gep, align 4
   %cmp = icmp ugt i32 %x, %y
   %ext = sext i1 %cmp to i32
   %and = and i32 %v, %ext
-  store i32 %and, i32 addrspace(1)* %gep, align 4
+  store i32 %and, ptr addrspace(1) %gep, align 4
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
index ce04136..971f2e3 100644
--- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
@@ -14,9 +14,9 @@ declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
 ; CM: MEM_RAT_CACHELESS STORE_DWORD [[RES:T[0-9]+\.[XYZW]]]
 ; EGCM: VTX_READ_16 [[VAL:T[0-9]+\.[XYZW]]]
 ; EGCM: FLT16_TO_FLT32{{[ *]*}}[[RES]], [[VAL]]
-define amdgpu_kernel void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16, i16 addrspace(1)* %in, align 2
+define amdgpu_kernel void @test_convert_fp16_to_fp32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %val = load i16, ptr addrspace(1) %in, align 2
   %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
-  store float %cvt, float addrspace(1)* %out, align 4
+  store float %cvt, ptr addrspace(1) %out, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
index 70f0c0c..5bb6841 100644
--- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
@@ -8,9 +8,9 @@ declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
 ; GCN: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
 ; GCN: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16, i16 addrspace(1)* %in, align 2
+define amdgpu_kernel void @test_convert_fp16_to_fp64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %val = load i16, ptr addrspace(1) %in, align 2
   %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
-  store double %cvt, double addrspace(1)* %out, align 4
+  store double %cvt, ptr addrspace(1) %out, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll b/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
index 579a145..1786fea 100644
--- a/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
@@ -12,9 +12,9 @@ declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
 ; EG: MEM_RAT MSKOR
 ; EG: VTX_READ_32
 ; EG: FLT32_TO_FLT16
-define amdgpu_kernel void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %val = load float, float addrspace(1)* %in, align 4
+define amdgpu_kernel void @test_convert_fp32_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %val = load float, ptr addrspace(1) %in, align 4
   %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
-  store i16 %cvt, i16 addrspace(1)* %out, align 2
+  store i16 %cvt, ptr addrspace(1) %out, align 2
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
index a602a04..2c318d7 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
@@ -6,18 +6,18 @@ declare double @llvm.fabs.f64(double) #1
 
 ; FUNC-LABEL: @fp_to_sint_f64_i32
 ; SI: v_cvt_i32_f64_e32
-define amdgpu_kernel void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
+define amdgpu_kernel void @fp_to_sint_f64_i32(ptr addrspace(1) %out, double %in) {
   %result = fptosi double %in to i32
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: @fp_to_sint_v2f64_v2i32
 ; SI: v_cvt_i32_f64_e32
 ; SI: v_cvt_i32_f64_e32
-define amdgpu_kernel void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @fp_to_sint_v2f64_v2i32(ptr addrspace(1) %out, <2 x double> %in) {
   %result = fptosi <2 x double> %in to <2 x i32>
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -26,9 +26,9 @@ define amdgpu_kernel void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out,
 ; SI: v_cvt_i32_f64_e32
 ; SI: v_cvt_i32_f64_e32
 ; SI: v_cvt_i32_f64_e32
-define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(ptr addrspace(1) %out, <4 x double> %in) {
   %result = fptosi <4 x double> %in to <4 x i32>
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -47,29 +47,29 @@ define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out,
 ; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
 ; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
 ; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
+define amdgpu_kernel void @fp_to_sint_i64_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %val = load double, double addrspace(1)* %gep, align 8
+  %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %val = load double, ptr addrspace(1) %gep, align 8
   %cast = fptosi double %val to i64
-  store i64 %cast, i64 addrspace(1)* %out, align 8
+  store i64 %cast, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1:
 ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fp_to_sint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_sint_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
   %conv = fptosi double %in to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1:
 ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @fp_to_sint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_sint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
   %in.fabs = call double @llvm.fabs.f64(double %in)
   %conv = fptosi double %in.fabs to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
index 18b0fe3..e32d5d7 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
@@ -5,7 +5,7 @@
 
 declare float @llvm.fabs.f32(float) #1
 
-define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fp_to_sint_i32(ptr addrspace(1) %out, float %in) {
 ; SI-LABEL: fp_to_sint_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -40,11 +40,11 @@ define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptosi float %in to i32
-  store i32 %conv, i32 addrspace(1)* %out
+  store i32 %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fp_to_sint_i32_fabs(ptr addrspace(1) %out, float %in) {
 ; SI-LABEL: fp_to_sint_i32_fabs:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -80,11 +80,11 @@ define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %in.fabs = call float @llvm.fabs.f32(float %in)
   %conv = fptosi float %in.fabs to i32
-  store i32 %conv, i32 addrspace(1)* %out
+  store i32 %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @fp_to_sint_v2i32(ptr addrspace(1) %out, <2 x float> %in) {
 ; SI-LABEL: fp_to_sint_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -125,11 +125,11 @@ define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x f
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %result = fptosi <2 x float> %in to <2 x i32>
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define amdgpu_kernel void @fp_to_sint_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: fp_to_sint_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -183,14 +183,14 @@ define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x f
 ; EG-NEXT:     FLT_TO_INT T0.X, PV.W,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %value = load <4 x float>, <4 x float> addrspace(1) * %in
+  %value = load <4 x float>, ptr addrspace(1) %in
   %result = fptosi <4 x float> %value to <4 x i32>
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; Check that the compiler doesn't crash with a "cannot select" error
-define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fp_to_sint_i64 (ptr addrspace(1) %out, float %in) {
 ; SI-LABEL: fp_to_sint_i64:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -287,11 +287,11 @@ define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 entry:
   %0 = fptosi float %in to i64
-  store i64 %0, i64 addrspace(1)* %out
+  store i64 %0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
+define amdgpu_kernel void @fp_to_sint_v2i64(ptr addrspace(1) %out, <2 x float> %x) {
 ; SI-LABEL: fp_to_sint_v2i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -446,11 +446,11 @@ define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x f
 ; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptosi <2 x float> %x to <2 x i64>
-  store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
+  store <2 x i64> %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
+define amdgpu_kernel void @fp_to_sint_v4i64(ptr addrspace(1) %out, <4 x float> %x) {
 ; SI-LABEL: fp_to_sint_v4i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -733,11 +733,11 @@ define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x f
 ; EG-NEXT:     LSHR * T0.X, PV.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptosi <4 x float> %x to <4 x i64>
-  store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
+  store <4 x i64> %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
 ; SI-LABEL: fp_to_uint_f32_to_i1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -783,11 +783,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptosi float %in to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
 ; SI-LABEL: fp_to_uint_fabs_f32_to_i1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -834,11 +834,11 @@ define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, floa
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %in.fabs = call float @llvm.fabs.f32(float %in)
   %conv = fptosi float %in.fabs to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_sint_f32_i16(ptr addrspace(1) %out, float %in) #0 {
 ; SI-LABEL: fp_to_sint_f32_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -883,7 +883,7 @@ define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in)
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %sint = fptosi float %in to i16
-  store i16 %sint, i16 addrspace(1)* %out
+  store i16 %sint, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
index 8a86446..ba23dd0 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
@@ -6,18 +6,18 @@ declare double @llvm.fabs.f64(double) #1
 
 ; SI-LABEL: {{^}}fp_to_uint_i32_f64:
 ; SI: v_cvt_u32_f64_e32
-define amdgpu_kernel void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
+define amdgpu_kernel void @fp_to_uint_i32_f64(ptr addrspace(1) %out, double %in) {
   %cast = fptoui double %in to i32
-  store i32 %cast, i32 addrspace(1)* %out, align 4
+  store i32 %cast, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; SI-LABEL: @fp_to_uint_v2i32_v2f64
 ; SI: v_cvt_u32_f64_e32
 ; SI: v_cvt_u32_f64_e32
-define amdgpu_kernel void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @fp_to_uint_v2i32_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
   %cast = fptoui <2 x double> %in to <2 x i32>
-  store <2 x i32> %cast, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %cast, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -26,9 +26,9 @@ define amdgpu_kernel void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out,
 ; SI: v_cvt_u32_f64_e32
 ; SI: v_cvt_u32_f64_e32
 ; SI: v_cvt_u32_f64_e32
-define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
   %cast = fptoui <4 x double> %in to <4 x i32>
-  store <4 x i32> %cast, <4 x i32> addrspace(1)* %out, align 8
+  store <4 x i32> %cast, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -47,43 +47,43 @@ define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out,
 ; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
 ; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
 ; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
+define amdgpu_kernel void @fp_to_uint_i64_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %val = load double, double addrspace(1)* %gep, align 8
+  %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %val = load double, ptr addrspace(1) %gep, align 8
   %cast = fptoui double %val to i64
-  store i64 %cast, i64 addrspace(1)* %out, align 4
+  store i64 %cast, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; SI-LABEL: @fp_to_uint_v2i64_v2f64
-define amdgpu_kernel void @fp_to_uint_v2i64_v2f64(<2 x i64> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @fp_to_uint_v2i64_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
   %cast = fptoui <2 x double> %in to <2 x i64>
-  store <2 x i64> %cast, <2 x i64> addrspace(1)* %out, align 16
+  store <2 x i64> %cast, ptr addrspace(1) %out, align 16
   ret void
 }
 
 ; SI-LABEL: @fp_to_uint_v4i64_v4f64
-define amdgpu_kernel void @fp_to_uint_v4i64_v4f64(<4 x i64> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @fp_to_uint_v4i64_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
   %cast = fptoui <4 x double> %in to <4 x i64>
-  store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32
+  store <4 x i64> %cast, ptr addrspace(1) %out, align 32
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fp_to_uint_f64_to_i1:
 ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fp_to_uint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_uint_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
   %conv = fptoui double %in to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fp_to_uint_fabs_f64_to_i1:
 ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @fp_to_uint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_uint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
   %in.fabs = call double @llvm.fabs.f64(double %in)
   %conv = fptoui double %in.fabs to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
index ab90330..ca64382 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
@@ -5,7 +5,7 @@
 
 declare float @llvm.fabs.f32(float) #1
 
-define amdgpu_kernel void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fp_to_uint_f32_to_i32 (ptr addrspace(1) %out, float %in) {
 ; SI-LABEL: fp_to_uint_f32_to_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -40,11 +40,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float
 ; EG-NEXT:     FLT_TO_UINT * T1.X, PV.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptoui float %in to i32
-  store i32 %conv, i32 addrspace(1)* %out
+  store i32 %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(ptr addrspace(1) %out, <2 x float> %in) {
 ; SI-LABEL: fp_to_uint_v2f32_to_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -85,11 +85,11 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %ou
 ; EG-NEXT:     FLT_TO_UINT * T0.X, T1.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %result = fptoui <2 x float> %in to <2 x i32>
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: fp_to_uint_v4f32_to_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -143,13 +143,13 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %ou
 ; EG-NEXT:     LSHR T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:     FLT_TO_UINT * T0.X, PV.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %value = load <4 x float>, <4 x float> addrspace(1) * %in
+  %value = load <4 x float>, ptr addrspace(1) %in
   %result = fptoui <4 x float> %value to <4 x i32>
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %x) {
+define amdgpu_kernel void @fp_to_uint_f32_to_i64(ptr addrspace(1) %out, float %x) {
 ; SI-LABEL: fp_to_uint_f32_to_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -233,11 +233,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptoui float %x to i64
-  store i64 %conv, i64 addrspace(1)* %out
+  store i64 %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
+define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(ptr addrspace(1) %out, <2 x float> %x) {
 ; SI-LABEL: fp_to_uint_v2f32_to_v2i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -370,11 +370,11 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %ou
 ; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptoui <2 x float> %x to <2 x i64>
-  store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
+  store <2 x i64> %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
+define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(ptr addrspace(1) %out, <4 x float> %x) {
 ; SI-LABEL: fp_to_uint_v4f32_to_v4i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -615,11 +615,11 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %ou
 ; EG-NEXT:     LSHR * T0.X, PV.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptoui <4 x float> %x to <4 x i64>
-  store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
+  store <4 x i64> %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
 ; SI-LABEL: fp_to_uint_f32_to_i1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -665,11 +665,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptoui float %in to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
 ; SI-LABEL: fp_to_uint_fabs_f32_to_i1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -716,11 +716,11 @@ define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, floa
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %in.fabs = call float @llvm.fabs.f32(float %in)
   %conv = fptoui float %in.fabs to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_f32_to_i16(ptr addrspace(1) %out, float %in) #0 {
 ; SI-LABEL: fp_to_uint_f32_to_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -764,7 +764,7 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float %
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %uint = fptoui float %in to i16
-  store i16 %uint, i16 addrspace(1)* %out
+  store i16 %uint, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
index 91dac92..0572f9a 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
@@ -8,12 +8,12 @@
 ; GCN: buffer_store_dword v[[R_F32]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = fpext half %a.val to float
-  store float %r.val, float addrspace(1)* %r
+  store float %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -24,12 +24,12 @@ entry:
 ; GCN: buffer_store_dwordx2 v[[[R_F64_0]]:[[R_F64_1]]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fpext_f16_to_f64(
-    double addrspace(1)* %r,
-    half addrspace(1)* %a) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = fpext half %a.val to double
-  store double %r.val, double addrspace(1)* %r
+  store double %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -43,12 +43,12 @@ entry:
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @fpext_v2f16_to_v2f32(
-    <2 x float> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fpext <2 x half> %a.val to <2 x float>
-  store <2 x float> %r.val, <2 x float> addrspace(1)* %r
+  store <2 x float> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -65,23 +65,23 @@ entry:
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @fpext_v2f16_to_v2f64(
-    <2 x double> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fpext <2 x half> %a.val to <2 x double>
-  store <2 x double> %r.val, <2 x double> addrspace(1)* %r
+  store <2 x double> %r.val, ptr addrspace(1) %r
   ret void
 }
 
 ; GCN-LABEL: {{^}}s_fneg_fpext_f16_to_f32:
 ; GCN: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @s_fneg_fpext_f16_to_f32(float addrspace(1)* %r, i32 %a) {
+define amdgpu_kernel void @s_fneg_fpext_f16_to_f32(ptr addrspace(1) %r, i32 %a) {
 entry:
   %a.trunc = trunc i32 %a to i16
   %a.val = bitcast i16 %a.trunc to half
   %r.val = fpext half %a.val to float
-  store float %r.val, float addrspace(1)* %r
+  store float %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -89,13 +89,13 @@ entry:
 ; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
 ; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -[[A]]
 define amdgpu_kernel void @fneg_fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %a.neg = fsub half -0.0, %a.val
   %r.val = fpext half %a.neg to float
-  store float %r.val, float addrspace(1)* %r
+  store float %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -103,13 +103,13 @@ entry:
 ; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
 ; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, |[[A]]|
 define amdgpu_kernel void @fabs_fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %a.fabs = call half @llvm.fabs.f16(half %a.val)
   %r.val = fpext half %a.fabs to float
-  store float %r.val, float addrspace(1)* %r
+  store float %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -117,14 +117,14 @@ entry:
 ; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
 ; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|[[A]]|
 define amdgpu_kernel void @fneg_fabs_fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %a.fabs = call half @llvm.fabs.f16(half %a.val)
   %a.fneg.fabs = fsub half -0.0, %a.fabs
   %r.val = fpext half %a.fneg.fabs to float
-  store float %r.val, float addrspace(1)* %r
+  store float %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -139,14 +139,14 @@ entry:
 ; GCN: store_dword [[CVT]]
 ; GCN: store_short [[XOR]]
 define amdgpu_kernel void @fneg_multi_use_fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %a.neg = fsub half -0.0, %a.val
   %r.val = fpext half %a.neg to float
-  store volatile float %r.val, float addrspace(1)* %r
-  store volatile half %a.neg, half addrspace(1)* undef
+  store volatile float %r.val, ptr addrspace(1) %r
+  store volatile half %a.neg, ptr addrspace(1) undef
   ret void
 }
 
@@ -163,15 +163,15 @@ entry:
 ; GCN: buffer_store_dword [[CVTA_NEG]]
 ; GCN: buffer_store_short [[MUL]]
 define amdgpu_kernel void @fneg_multi_foldable_use_fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %a.neg = fsub half -0.0, %a.val
   %r.val = fpext half %a.neg to float
   %mul = fmul half %a.neg, %a.val
-  store volatile float %r.val, float addrspace(1)* %r
-  store volatile half %mul, half addrspace(1)* undef
+  store volatile float %r.val, ptr addrspace(1) %r
+  store volatile half %mul, ptr addrspace(1) undef
   ret void
 }
 
@@ -185,14 +185,14 @@ entry:
 ; GCN: store_dword [[CVT]]
 ; GCN: store_short [[XOR]]
 define amdgpu_kernel void @fabs_multi_use_fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %a.fabs = call half @llvm.fabs.f16(half %a.val)
   %r.val = fpext half %a.fabs to float
-  store volatile float %r.val, float addrspace(1)* %r
-  store volatile half %a.fabs, half addrspace(1)* undef
+  store volatile float %r.val, ptr addrspace(1) %r
+  store volatile half %a.fabs, ptr addrspace(1) undef
   ret void
 }
 
@@ -209,15 +209,15 @@ entry:
 ; GCN: buffer_store_dword [[ABS_A]]
 ; GCN: buffer_store_short [[MUL]]
 define amdgpu_kernel void @fabs_multi_foldable_use_fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %a.fabs = call half @llvm.fabs.f16(half %a.val)
   %r.val = fpext half %a.fabs to float
   %mul = fmul half %a.fabs, %a.val
-  store volatile float %r.val, float addrspace(1)* %r
-  store volatile half %mul, half addrspace(1)* undef
+  store volatile float %r.val, ptr addrspace(1) %r
+  store volatile half %mul, ptr addrspace(1) undef
   ret void
 }
 
@@ -231,15 +231,15 @@ entry:
 ; GCN: buffer_store_dword [[CVT]]
 ; GCN: buffer_store_short [[OR]]
 define amdgpu_kernel void @fabs_fneg_multi_use_fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %a.fabs = call half @llvm.fabs.f16(half %a.val)
   %a.fneg.fabs = fsub half -0.0, %a.fabs
   %r.val = fpext half %a.fneg.fabs to float
-  store volatile float %r.val, float addrspace(1)* %r
-  store volatile half %a.fneg.fabs, half addrspace(1)* undef
+  store volatile float %r.val, ptr addrspace(1) %r
+  store volatile half %a.fneg.fabs, ptr addrspace(1) undef
   ret void
 }
 
@@ -256,16 +256,16 @@ entry:
 ; GCN: buffer_store_dword [[FABS_FNEG]]
 ; GCN: buffer_store_short [[MUL]]
 define amdgpu_kernel void @fabs_fneg_multi_foldable_use_fpext_f16_to_f32(
-    float addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %a.fabs = call half @llvm.fabs.f16(half %a.val)
   %a.fneg.fabs = fsub half -0.0, %a.fabs
   %r.val = fpext half %a.fneg.fabs to float
   %mul = fmul half %a.fneg.fabs, %a.val
-  store volatile float %r.val, float addrspace(1)* %r
-  store volatile half %mul, half addrspace(1)* undef
+  store volatile float %r.val, ptr addrspace(1) %r
+  store volatile half %mul, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fpext.ll b/llvm/test/CodeGen/AMDGPU/fpext.ll
index b11e2ea..cc5291c 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext.ll
@@ -3,18 +3,18 @@
 
 ; FUNC-LABEL: {{^}}fpext_f32_to_f64:
 ; SI: v_cvt_f64_f32_e32 {{v\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-define amdgpu_kernel void @fpext_f32_to_f64(double addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fpext_f32_to_f64(ptr addrspace(1) %out, float %in) {
   %result = fpext float %in to double
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fpext_v2f32_to_v2f64:
 ; SI: v_cvt_f64_f32_e32
 ; SI: v_cvt_f64_f32_e32
-define amdgpu_kernel void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @fpext_v2f32_to_v2f64(ptr addrspace(1) %out, <2 x float> %in) {
   %result = fpext <2 x float> %in to <2 x double>
-  store <2 x double> %result, <2 x double> addrspace(1)* %out
+  store <2 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -22,9 +22,9 @@ define amdgpu_kernel void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out,
 ; SI: v_cvt_f64_f32_e32
 ; SI: v_cvt_f64_f32_e32
 ; SI: v_cvt_f64_f32_e32
-define amdgpu_kernel void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out, <3 x float> %in) {
+define amdgpu_kernel void @fpext_v3f32_to_v3f64(ptr addrspace(1) %out, <3 x float> %in) {
   %result = fpext <3 x float> %in to <3 x double>
-  store <3 x double> %result, <3 x double> addrspace(1)* %out
+  store <3 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -33,9 +33,9 @@ define amdgpu_kernel void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out,
 ; SI: v_cvt_f64_f32_e32
 ; SI: v_cvt_f64_f32_e32
 ; SI: v_cvt_f64_f32_e32
-define amdgpu_kernel void @fpext_v4f32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @fpext_v4f32_to_v4f64(ptr addrspace(1) %out, <4 x float> %in) {
   %result = fpext <4 x float> %in to <4 x double>
-  store <4 x double> %result, <4 x double> addrspace(1)* %out
+  store <4 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -48,8 +48,8 @@ define amdgpu_kernel void @fpext_v4f32_to_v4f64(<4 x double> addrspace(1)* %out,
 ; SI: v_cvt_f64_f32_e32
 ; SI: v_cvt_f64_f32_e32
 ; SI: v_cvt_f64_f32_e32
-define amdgpu_kernel void @fpext_v8f32_to_v8f64(<8 x double> addrspace(1)* %out, <8 x float> %in) {
+define amdgpu_kernel void @fpext_v8f32_to_v8f64(ptr addrspace(1) %out, <8 x float> %in) {
   %result = fpext <8 x float> %in to <8 x double>
-  store <8 x double> %result, <8 x double> addrspace(1)* %out
+  store <8 x double> %result, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
index 6fc9b7f..03792ca 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -9,12 +9,12 @@
 ; GCN: buffer_store_short v[[R_I16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptosi_f16_to_i16(
-    i16 addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = fptosi half %a.val to i16
-  store i16 %r.val, i16 addrspace(1)* %r
+  store i16 %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -25,12 +25,12 @@ entry:
 ; GCN: buffer_store_dword v[[R_I32]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptosi_f16_to_i32(
-    i32 addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = fptosi half %a.val to i32
-  store i32 %r.val, i32 addrspace(1)* %r
+  store i32 %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -45,12 +45,12 @@ entry:
 ; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptosi_f16_to_i64(
-    i64 addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = fptosi half %a.val to i64
-  store i64 %r.val, i64 addrspace(1)* %r
+  store i64 %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -74,12 +74,12 @@ entry:
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @fptosi_v2f16_to_v2i16(
-    <2 x i16> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fptosi <2 x half> %a.val to <2 x i16>
-  store <2 x i16> %r.val, <2 x i16> addrspace(1)* %r
+  store <2 x i16> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -93,12 +93,12 @@ entry:
 ; GCN: buffer_store_dwordx2
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptosi_v2f16_to_v2i32(
-    <2 x i32> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fptosi <2 x half> %a.val to <2 x i32>
-  store <2 x i32> %r.val, <2 x i32> addrspace(1)* %r
+  store <2 x i32> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -124,12 +124,12 @@ entry:
 ; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptosi_v2f16_to_v2i64(
-    <2 x i64> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fptosi <2 x half> %a.val to <2 x i64>
-  store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
+  store <2 x i64> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -139,9 +139,9 @@ entry:
 ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
 ; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}}
 ; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[4:5]
-define amdgpu_kernel void @fptosi_f16_to_i1(i1 addrspace(1)* %out, half %in) {
+define amdgpu_kernel void @fptosi_f16_to_i1(ptr addrspace(1) %out, half %in) {
 entry:
   %conv = fptosi half %in to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
index bf8677b..48b76eb 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
@@ -9,12 +9,12 @@
 ; GCN: buffer_store_short v[[R_I16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptoui_f16_to_i16(
-    i16 addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = fptoui half %a.val to i16
-  store i16 %r.val, i16 addrspace(1)* %r
+  store i16 %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -25,12 +25,12 @@ entry:
 ; GCN: buffer_store_dword v[[R_I32]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptoui_f16_to_i32(
-    i32 addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = fptoui half %a.val to i32
-  store i32 %r.val, i32 addrspace(1)* %r
+  store i32 %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -45,12 +45,12 @@ entry:
 ; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptoui_f16_to_i64(
-    i64 addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = fptoui half %a.val to i64
-  store i64 %r.val, i64 addrspace(1)* %r
+  store i64 %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -73,12 +73,12 @@ entry:
 ; GCN:     s_endpgm
 
 define amdgpu_kernel void @fptoui_v2f16_to_v2i16(
-    <2 x i16> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fptoui <2 x half> %a.val to <2 x i16>
-  store <2 x i16> %r.val, <2 x i16> addrspace(1)* %r
+  store <2 x i16> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -92,12 +92,12 @@ entry:
 ; GCN: buffer_store_dwordx2
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptoui_v2f16_to_v2i32(
-    <2 x i32> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fptoui <2 x half> %a.val to <2 x i32>
-  store <2 x i32> %r.val, <2 x i32> addrspace(1)* %r
+  store <2 x i32> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -120,12 +120,12 @@ entry:
 ; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fptoui_v2f16_to_v2i64(
-    <2 x i64> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fptoui <2 x half> %a.val to <2 x i64>
-  store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
+  store <2 x i64> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -135,9 +135,9 @@ entry:
 ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
 ; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
 ; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[4:5]
-define amdgpu_kernel void @fptoui_f16_to_i1(i1 addrspace(1)* %out, half %in) {
+define amdgpu_kernel void @fptoui_f16_to_i1(ptr addrspace(1) %out, half %in) {
 entry:
   %conv = fptoui half %in to i1
-  store i1 %conv, i1 addrspace(1)* %out
+  store i1 %conv, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll b/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll
index d501be5..8300436 100644
--- a/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll
+++ b/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll
@@ -2,10 +2,10 @@
 
 ;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define amdgpu_kernel void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
-  %1 = load i8, i8 addrspace(1)* %in
+define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %1 = load i8, ptr addrspace(1) %in
   %2 = uitofp i8 %1 to double
   %3 = fptrunc double %2 to float
-  store float %3, float addrspace(1)* %out
+  store float %3, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
index eadce22..0c2d669 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
@@ -5,7 +5,7 @@
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_sgt_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_sgt_true_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -15,7 +15,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -27,7 +27,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_sgt_true_sext_swap(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_sgt_true_sext_swap(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -37,7 +37,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -49,7 +49,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_ne_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ne_true_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -59,7 +59,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -71,7 +71,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_ult_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ult_true_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -81,7 +81,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -93,7 +93,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_eq_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_eq_true_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -103,7 +103,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -115,7 +115,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_sle_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_sle_true_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -125,7 +125,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -137,7 +137,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_uge_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_uge_true_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -147,7 +147,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -159,7 +159,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_eq_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_eq_false_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -169,7 +169,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -181,7 +181,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_sge_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_sge_false_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -191,7 +191,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -203,7 +203,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_ule_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ule_false_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -213,7 +213,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -225,7 +225,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_ne_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ne_false_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -235,7 +235,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -246,7 +246,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_ugt_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ugt_false_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -256,7 +256,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
@@ -267,7 +267,7 @@ endif:
 ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
 ; GCN-NOT:  v_cndmask_
 
-define amdgpu_kernel void @setcc_slt_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_slt_false_sext(ptr addrspace(1) nocapture %arg) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -277,7 +277,7 @@ bb:
   br i1 %cond, label %then, label %endif
 
 then:
-  store i32 1, i32 addrspace(1)* %arg, align 4
+  store i32 1, ptr addrspace(1) %arg, align 4
   br label %endif
 
 endif:
diff --git a/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll b/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll
index 0b780af..91b418a 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll
@@ -6,10 +6,10 @@
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: SUB_INT {{[* ]*}}[[RES]]
 ; EG-NOT: BFE
-define amdgpu_kernel void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+define amdgpu_kernel void @sext_in_reg_i1_i32_add(ptr addrspace(1) %out, i1 %a, i32 %b) {
   %sext = sext i1 %a to i32
   %res = add i32 %b, %sext
-  store i32 %res, i32 addrspace(1)* %out
+  store i32 %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -18,9 +18,9 @@ define amdgpu_kernel void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a,
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: ADD_INT {{[* ]*}}[[RES]]
 ; EG-NOT: BFE
-define amdgpu_kernel void @sext_in_reg_i1_i32_sub(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+define amdgpu_kernel void @sext_in_reg_i1_i32_sub(ptr addrspace(1) %out, i1 %a, i32 %b) {
   %sext = sext i1 %a to i32
   %res = sub i32 %b, %sext
-  store i32 %res, i32 addrspace(1)* %out
+  store i32 %res, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
index 30487eac..6376833 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
@@ -15,10 +15,10 @@
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: LSHR * [[ADDR]]
 ; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
-define amdgpu_kernel void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_i32(ptr addrspace(1) %out, i32 %in) #0 {
   %shl = shl i32 %in, 31
   %sext = ashr i32 %shl, 31
-  store i32 %sext, i32 addrspace(1)* %out
+  store i32 %sext, ptr addrspace(1) %out
   ret void
 }
 
@@ -32,11 +32,11 @@ define amdgpu_kernel void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) #
 ; EG: ADD_INT
 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
 ; EG-NEXT: LSHR * [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %shl = shl i32 %c, 24
   %ashr = ashr i32 %shl, 24
-  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  store i32 %ashr, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -50,11 +50,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a,
 ; EG: ADD_INT
 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
 ; EG-NEXT: LSHR * [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %shl = shl i32 %c, 16
   %ashr = ashr i32 %shl, 16
-  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  store i32 %ashr, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -68,11 +68,11 @@ define amdgpu_kernel void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a
 ; EG: ADD_INT
 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
 ; EG-NEXT: LSHR * [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(ptr addrspace(1) %out, <1 x i32> %a, <1 x i32> %b) #0 {
   %c = add <1 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <1 x i32> %c, <i32 24>
   %ashr = ashr <1 x i32> %shl, <i32 24>
-  store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
+  store <1 x i32> %ashr, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -82,11 +82,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out,
 ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
 ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 63
   %ashr = ashr i64 %shl, 63
-  store i64 %ashr, i64 addrspace(1)* %out, align 8
+  store i64 %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -96,11 +96,11 @@ define amdgpu_kernel void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a,
 ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
 ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 56
   %ashr = ashr i64 %shl, 56
-  store i64 %ashr, i64 addrspace(1)* %out, align 8
+  store i64 %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -111,11 +111,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a,
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
 ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
 
-define amdgpu_kernel void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i16_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 48
   %ashr = ashr i64 %shl, 48
-  store i64 %ashr, i64 addrspace(1)* %out, align 8
+  store i64 %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -125,11 +125,11 @@ define amdgpu_kernel void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a
 ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
 ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i32_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 32
   %ashr = ashr i64 %shl, 32
-  store i64 %ashr, i64 addrspace(1)* %out, align 8
+  store i64 %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -140,11 +140,11 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a
 ; XGCN: buffer_store_dword
 ; XEG: BFE_INT
 ; XEG: ASHR
-; define amdgpu_kernel void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) #0 {
+; define amdgpu_kernel void @sext_in_reg_i8_to_v1i64(ptr addrspace(1) %out, <1 x i64> %a, <1 x i64> %b) #0 {
 ;   %c = add <1 x i64> %a, %b
 ;   %shl = shl <1 x i64> %c, <i64 56>
 ;   %ashr = ashr <1 x i64> %shl, <i64 56>
-;   store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
+;   store <1 x i64> %ashr, ptr addrspace(1) %out, align 8
 ;   ret void
 ; }
 
@@ -160,18 +160,18 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a
 
 ; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %a.gep, align 8
-  %b = load i64, i64 addrspace(1)* %b.gep, align 8
+  %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %a.gep, align 8
+  %b = load i64, ptr addrspace(1) %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 63
   %ashr = ashr i64 %shl, 63
-  store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+  store i64 %ashr, ptr addrspace(1) %out.gep, align 8
   ret void
 }
 
@@ -187,18 +187,18 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 a
 
 ; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %a.gep, align 8
-  %b = load i64, i64 addrspace(1)* %b.gep, align 8
+  %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %a.gep, align 8
+  %b = load i64, ptr addrspace(1) %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 56
   %ashr = ashr i64 %shl, 56
-  store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+  store i64 %ashr, ptr addrspace(1) %out.gep, align 8
   ret void
 }
 
@@ -214,18 +214,18 @@ define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 a
 
 ; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %a.gep, align 8
-  %b = load i64, i64 addrspace(1)* %b.gep, align 8
+  %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %a.gep, align 8
+  %b = load i64, ptr addrspace(1) %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 48
   %ashr = ashr i64 %shl, 48
-  store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+  store i64 %ashr, ptr addrspace(1) %out.gep, align 8
   ret void
 }
 
@@ -238,18 +238,18 @@ define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64
 
 ; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[SHR]]]
-define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %a.gep, align 8
-  %b = load i64, i64 addrspace(1)* %b.gep, align 8
+  %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %a.gep, align 8
+  %b = load i64, ptr addrspace(1) %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 32
   %ashr = ashr i64 %shl, 32
-  store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+  store i64 %ashr, ptr addrspace(1) %out.gep, align 8
   ret void
 }
 
@@ -264,11 +264,11 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64
 ; EG: LSHL
 ; EG: ASHR [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
   %c = add i32 %a, %b
   %x = shl i32 %c, 6
   %y = ashr i32 %x, 7
-  store i32 %y, i32 addrspace(1)* %out
+  store i32 %y, ptr addrspace(1) %out
   ret void
 }
 
@@ -287,11 +287,11 @@ define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)*
 ; EG: LSHL
 ; EG: ASHR [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
   %c = add <2 x i32> %a, %b
   %x = shl <2 x i32> %c, <i32 6, i32 6>
   %y = ashr <2 x i32> %x, <i32 7, i32 7>
-  store <2 x i32> %y, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %y, ptr addrspace(1) %out
   ret void
 }
 
@@ -305,11 +305,11 @@ define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addr
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i32> %c, <i32 31, i32 31>
   %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
-  store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -326,11 +326,11 @@ define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %ou
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) #0 {
   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
   %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
-  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+  store <4 x i32> %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -343,11 +343,11 @@ define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %ou
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i32> %c, <i32 24, i32 24>
   %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
-  store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -364,11 +364,11 @@ define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %ou
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) #0 {
   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
-  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+  store <4 x i32> %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -381,35 +381,35 @@ define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %ou
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i32> %c, <i32 16, i32 16>
   %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
-  store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; FUNC-LABEL: {{^}}testcase:
-define amdgpu_kernel void @testcase(i8 addrspace(1)* %out, i8 %a) #0 {
+define amdgpu_kernel void @testcase(ptr addrspace(1) %out, i8 %a) #0 {
   %and_a_1 = and i8 %a, 1
   %cmp_eq = icmp eq i8 %and_a_1, 0
   %cmp_slt = icmp slt i8 %a, 0
   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
   %xor = xor i8 %sel0, %sel1
-  store i8 %xor, i8 addrspace(1)* %out
+  store i8 %xor, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}testcase_3:
-define amdgpu_kernel void @testcase_3(i8 addrspace(1)* %out, i8 %a) #0 {
+define amdgpu_kernel void @testcase_3(ptr addrspace(1) %out, i8 %a) #0 {
   %and_a_1 = and i8 %a, 1
   %cmp_eq = icmp eq i8 %and_a_1, 0
   %cmp_slt = icmp slt i8 %a, 0
   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
   %xor = xor i8 %sel0, %sel1
-  store i8 %xor, i8 addrspace(1)* %out
+  store i8 %xor, ptr addrspace(1) %out
   ret void
 }
 
@@ -418,26 +418,26 @@ define amdgpu_kernel void @testcase_3(i8 addrspace(1)* %out, i8 %a) #0 {
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 {
-  %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
-  %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
+define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
+  %loada = load <4 x i32>, ptr addrspace(1) %a, align 16
+  %loadb = load <4 x i32>, ptr addrspace(1) %b, align 16
   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
-  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+  store <4 x i32> %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
-define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 {
-  %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
-  %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
+define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
+  %loada = load <4 x i32>, ptr addrspace(1) %a, align 16
+  %loadb = load <4 x i32>, ptr addrspace(1) %b, align 16
   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
   %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
-  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+  store <4 x i32> %ashr, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -446,14 +446,14 @@ define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1
 ; GCN: v_max_i32
 ; GCN-NOT: bfe
 ; GCN: buffer_store_short
-define amdgpu_kernel void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) #0 {
-  %tmp5 = load i8, i8 addrspace(1)* %src, align 1
+define amdgpu_kernel void @sext_in_reg_to_illegal_type(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %src) #0 {
+  %tmp5 = load i8, ptr addrspace(1) %src, align 1
   %tmp2 = sext i8 %tmp5 to i32
   %tmp2.5 = icmp sgt i32 %tmp2, 0
   %tmp3 = select i1 %tmp2.5, i32 %tmp2, i32 0
   %tmp4 = trunc i32 %tmp3 to i8
   %tmp6 = sext i8 %tmp4 to i16
-  store i16 %tmp6, i16 addrspace(1)* %out, align 2
+  store i16 %tmp6, ptr addrspace(1) %out, align 2
   ret void
 }
 
@@ -472,20 +472,20 @@ define amdgpu_kernel void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocaptu
 ; GCN-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]]
 ; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %a.gep, align 8
-  %b = load i64, i64 addrspace(1)* %b.gep, align 8
+  %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %a.gep, align 8
+  %b = load i64, ptr addrspace(1) %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 63
   %ashr = ashr i64 %shl, 63
 
   %and = and i64 %ashr, %s.val
-  store i64 %and, i64 addrspace(1)* %out.gep, align 8
+  store i64 %and, ptr addrspace(1) %out.gep, align 8
   ret void
 }
 
@@ -502,19 +502,19 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %o
 
 ; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %a.gep, align 8
-  %b = load i64, i64 addrspace(1)* %b.gep, align 8
+  %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %a.gep, align 8
+  %b = load i64, ptr addrspace(1) %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 32
   %ashr = ashr i64 %shl, 32
   %and = and i64 %ashr, %s.val
-  store i64 %and, i64 addrspace(1)* %out.gep, align 8
+  store i64 %and, ptr addrspace(1) %out.gep, align 8
   ret void
 }
 
@@ -528,12 +528,12 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
-define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
-  %ld = load i32, i32 addrspace(4)* %ptr
+define amdgpu_kernel void @s_sext_in_reg_i1_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+  %ld = load i32, ptr addrspace(4) %ptr
   %in = trunc i32 %ld to i16
   %shl = shl i16 %in, 15
   %sext = ashr i16 %shl, 15
-  store i16 %sext, i16 addrspace(1)* %out
+  store i16 %sext, ptr addrspace(1) %out
   ret void
 }
 
@@ -547,12 +547,12 @@ define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addr
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
-define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
-  %ld = load i32, i32 addrspace(4)* %ptr
+define amdgpu_kernel void @s_sext_in_reg_i2_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+  %ld = load i32, ptr addrspace(4) %ptr
   %in = trunc i32 %ld to i16
   %shl = shl i16 %in, 14
   %sext = ashr i16 %shl, 14
-  store i16 %sext, i16 addrspace(1)* %out
+  store i16 %sext, ptr addrspace(1) %out
   ret void
 }
 
@@ -561,15 +561,15 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addr
 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}}
 
 ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
-define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_i16(ptr addrspace(3) %out, ptr addrspace(1) %ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid
-  %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
+  %gep = getelementptr i16, ptr addrspace(1) %ptr, i32 %tid
+  %out.gep = getelementptr i16, ptr addrspace(3) %out, i32 %tid
 
-  %in = load i16, i16 addrspace(1)* %gep
+  %in = load i16, ptr addrspace(1) %gep
   %shl = shl i16 %in, 15
   %sext = ashr i16 %shl, 15
-  store i16 %sext, i16 addrspace(3)* %out.gep
+  store i16 %sext, ptr addrspace(3) %out.gep
   ret void
 }
 
@@ -582,19 +582,19 @@ define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addr
 
 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}}
 ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
-define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind {
+define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(ptr addrspace(3) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i16 %s.val) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
-  %b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
-  %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
-  %a = load volatile i16, i16 addrspace(1)* %a.gep, align 2
-  %b = load volatile i16, i16 addrspace(1)* %b.gep, align 2
+  %a.gep = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
+  %b.gep = getelementptr i16, ptr addrspace(1) %bptr, i32 %tid
+  %out.gep = getelementptr i16, ptr addrspace(3) %out, i32 %tid
+  %a = load volatile i16, ptr addrspace(1) %a.gep, align 2
+  %b = load volatile i16, ptr addrspace(1) %b.gep, align 2
 
   %c = shl i16 %a, %b
   %shl = shl i16 %c, 15
   %ashr = ashr i16 %shl, 15
 
-  store i16 %ashr, i16 addrspace(3)* %out.gep, align 2
+  store i16 %ashr, ptr addrspace(3) %out.gep, align 2
   ret void
 }
 
@@ -608,10 +608,10 @@ define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out,
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
-define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(ptr addrspace(1) %out, i16 %in) #0 {
   %shl = shl i16 %in, 14
   %sext = ashr i16 %shl, 14
-  store i16 %sext, i16 addrspace(1)* %out
+  store i16 %sext, ptr addrspace(1) %out
   ret void
 }
 
@@ -625,10 +625,10 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
-define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(ptr addrspace(1) %out, i16 %in) #0 {
   %shl = shl i16 %in, 8
   %sext = ashr i16 %shl, 8
-  store i16 %sext, i16 addrspace(1)* %out
+  store i16 %sext, ptr addrspace(1) %out
   ret void
 }
 
@@ -642,10 +642,10 @@ define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
-define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(ptr addrspace(1) %out, i16 %in) #0 {
   %shl = shl i16 %in, 1
   %sext = ashr i16 %shl, 1
-  store i16 %sext, i16 addrspace(1)* %out
+  store i16 %sext, ptr addrspace(1) %out
   ret void
 }
 
@@ -653,11 +653,11 @@ define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16
 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
 ; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 15, [[ADD]]
 ; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 15, [[SHL]]
-define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 {
   %c = add <2 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i16> %c, <i16 15, i16 15>
   %ashr = ashr <2 x i16> %shl, <i16 15, i16 15>
-  store <2 x i16> %ashr, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %ashr, ptr addrspace(1) %out
   ret void
 }
 
@@ -668,11 +668,11 @@ define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(<2 x i16> addrspace(1)* %ou
 ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 15, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}}
-define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) #0 {
   %c = add <3 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <3 x i16> %c, <i16 15, i16 15, i16 15>
   %ashr = ashr <3 x i16> %shl, <i16 15, i16 15, i16 15>
-  store <3 x i16> %ashr, <3 x i16> addrspace(1)* %out
+  store <3 x i16> %ashr, ptr addrspace(1) %out
   ret void
 }
 
@@ -680,11 +680,11 @@ define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(<3 x i16> addrspace(1)* %ou
 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
 ; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 14, [[ADD]]
 ; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 14, [[SHL]]
-define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 {
   %c = add <2 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i16> %c, <i16 14, i16 14>
   %ashr = ashr <2 x i16> %shl, <i16 14, i16 14>
-  store <2 x i16> %ashr, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %ashr, ptr addrspace(1) %out
   ret void
 }
 
@@ -692,11 +692,11 @@ define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(<2 x i16> addrspace(1)* %ou
 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
 ; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 8, [[ADD]]
 ; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 8, [[SHL]]
-define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 {
   %c = add <2 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i16> %c, <i16 8, i16 8>
   %ashr = ashr <2 x i16> %shl, <i16 8, i16 8>
-  store <2 x i16> %ashr, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %ashr, ptr addrspace(1) %out
   ret void
 }
 
@@ -707,11 +707,11 @@ define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(<2 x i16> addrspace(1)* %ou
 ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
-define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) #0 {
   %c = add <3 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <3 x i16> %c, <i16 8, i16 8, i16 8>
   %ashr = ashr <3 x i16> %shl, <i16 8, i16 8, i16 8>
-  store <3 x i16> %ashr, <3 x i16> addrspace(1)* %out
+  store <3 x i16> %ashr, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/sign_extend.ll b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
index 5dd8bee..9a03d21 100644
--- a/llvm/test/CodeGen/AMDGPU/sign_extend.ll
+++ b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-- -amdgpu-scalarize-global-loads=false -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -allow-deprecated-dag-overlap -enable-var-scope --check-prefix=SI
 ; RUN: llc -mtriple=amdgcn-- -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -allow-deprecated-dag-overlap -enable-var-scope --check-prefix=VI
 
-define amdgpu_kernel void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: s_sext_i1_to_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -32,11 +32,11 @@ define amdgpu_kernel void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %a, %b
   %sext = sext i1 %cmp to i32
-  store i32 %sext, i32 addrspace(1)* %out, align 4
+  store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
+define amdgpu_kernel void @test_s_sext_i32_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) nounwind {
 ; SI-LABEL: test_s_sext_i32_to_i64:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -74,11 +74,11 @@ entry:
   %mul = mul i32 %a, %b
   %add = add i32 %mul, %c
   %sext = sext i32 %add to i64
-  store i64 %sext, i64 addrspace(1)* %out, align 8
+  store i64 %sext, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: s_sext_i1_to_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -110,11 +110,11 @@ define amdgpu_kernel void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %a, %b
   %sext = sext i1 %cmp to i64
-  store i64 %sext, i64 addrspace(1)* %out, align 8
+  store i64 %sext, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
+define amdgpu_kernel void @s_sext_i32_to_i64(ptr addrspace(1) %out, i32 %a) nounwind {
 ; SI-LABEL: s_sext_i32_to_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -141,11 +141,11 @@ define amdgpu_kernel void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nou
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
   %sext = sext i32 %a to i64
-  store i64 %sext, i64 addrspace(1)* %out, align 8
+  store i64 %sext, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v_sext_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: v_sext_i32_to_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -181,13 +181,13 @@ define amdgpu_kernel void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspa
 ; VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
-  %val = load i32, i32 addrspace(1)* %in, align 4
+  %val = load i32, ptr addrspace(1) %in, align 4
   %sext = sext i32 %val to i64
-  store i64 %sext, i64 addrspace(1)* %out, align 8
+  store i64 %sext, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
+define amdgpu_kernel void @s_sext_i16_to_i64(ptr addrspace(1) %out, i16 %a) nounwind {
 ; SI-LABEL: s_sext_i16_to_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -214,11 +214,11 @@ define amdgpu_kernel void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nou
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
   %sext = sext i16 %a to i64
-  store i64 %sext, i64 addrspace(1)* %out, align 8
+  store i64 %sext, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i16(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: s_sext_i1_to_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -248,7 +248,7 @@ define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %a, %b
   %sext = sext i1 %cmp to i16
-  store i16 %sext, i16 addrspace(1)* %out
+  store i16 %sext, ptr addrspace(1) %out
   ret void
 }
 
@@ -256,7 +256,7 @@ define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32
 ; makes it all the way throught the legalizer/optimizer to make sure
 ; we select this correctly.  In the s_sext_i1_to_i16, the sign_extend node
 ; is optimized to a select very early.
-define amdgpu_kernel void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i16_with_and(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; SI-LABEL: s_sext_i1_to_i16_with_and:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -292,11 +292,11 @@ define amdgpu_kernel void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32
   %cmp1 = icmp eq i32 %c, %d
   %cmp = and i1 %cmp0, %cmp1
   %sext = sext i1 %cmp to i16
-  store i16 %sext, i16 addrspace(1)* %out
+  store i16 %sext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
+define amdgpu_kernel void @v_sext_i1_to_i16_with_and(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) nounwind {
 ; SI-LABEL: v_sext_i1_to_i16_with_and:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -335,7 +335,7 @@ define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32
   %cmp1 = icmp eq i32 %b, %c
   %cmp = and i1 %cmp0, %cmp1
   %sext = sext i1 %cmp to i16
-  store i16 %sext, i16 addrspace(1)* %out
+  store i16 %sext, ptr addrspace(1) %out
   ret void
 }
 
@@ -347,7 +347,7 @@ define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32
 ; t55: i16 = srl t29, Constant:i32<8>
 ; t63: i32 = any_extend t55
 ; t64: i32 = sign_extend_inreg t63, ValueType:ch:i8
-define amdgpu_kernel void @s_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+define amdgpu_kernel void @s_sext_v4i8_to_v4i32(ptr addrspace(1) %out, i32 %a) nounwind {
 ; SI-LABEL: s_sext_v4i8_to_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -403,16 +403,16 @@ define amdgpu_kernel void @s_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 %a)
   %elt1 = extractelement <4 x i32> %ext, i32 1
   %elt2 = extractelement <4 x i32> %ext, i32 2
   %elt3 = extractelement <4 x i32> %ext, i32 3
-  store volatile i32 %elt0, i32 addrspace(1)* %out
-  store volatile i32 %elt1, i32 addrspace(1)* %out
-  store volatile i32 %elt2, i32 addrspace(1)* %out
-  store volatile i32 %elt3, i32 addrspace(1)* %out
+  store volatile i32 %elt0, ptr addrspace(1) %out
+  store volatile i32 %elt1, ptr addrspace(1) %out
+  store volatile i32 %elt2, ptr addrspace(1) %out
+  store volatile i32 %elt3, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: need to optimize same sequence as above test to avoid
 ; this shift.
-define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v_sext_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: v_sext_v4i8_to_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -469,22 +469,22 @@ define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addr
 ; VI-NEXT:    buffer_store_dword v2, off, s[4:7], 0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_endpgm
-  %a = load i32, i32 addrspace(1)* %in
+  %a = load i32, ptr addrspace(1) %in
   %cast = bitcast i32 %a to <4 x i8>
   %ext = sext <4 x i8> %cast to <4 x i32>
   %elt0 = extractelement <4 x i32> %ext, i32 0
   %elt1 = extractelement <4 x i32> %ext, i32 1
   %elt2 = extractelement <4 x i32> %ext, i32 2
   %elt3 = extractelement <4 x i32> %ext, i32 3
-  store volatile i32 %elt0, i32 addrspace(1)* %out
-  store volatile i32 %elt1, i32 addrspace(1)* %out
-  store volatile i32 %elt2, i32 addrspace(1)* %out
-  store volatile i32 %elt3, i32 addrspace(1)* %out
+  store volatile i32 %elt0, ptr addrspace(1) %out
+  store volatile i32 %elt1, ptr addrspace(1) %out
+  store volatile i32 %elt2, ptr addrspace(1) %out
+  store volatile i32 %elt3, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: s_bfe_i64, same on SI and VI
-define amdgpu_kernel void @s_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 %a) nounwind {
+define amdgpu_kernel void @s_sext_v4i16_to_v4i32(ptr addrspace(1) %out, i64 %a) nounwind {
 ; SI-LABEL: s_sext_v4i16_to_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -542,14 +542,14 @@ define amdgpu_kernel void @s_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 %a)
   %elt1 = extractelement <4 x i32> %ext, i32 1
   %elt2 = extractelement <4 x i32> %ext, i32 2
   %elt3 = extractelement <4 x i32> %ext, i32 3
-  store volatile i32 %elt0, i32 addrspace(1)* %out
-  store volatile i32 %elt1, i32 addrspace(1)* %out
-  store volatile i32 %elt2, i32 addrspace(1)* %out
-  store volatile i32 %elt3, i32 addrspace(1)* %out
+  store volatile i32 %elt0, ptr addrspace(1) %out
+  store volatile i32 %elt1, ptr addrspace(1) %out
+  store volatile i32 %elt2, ptr addrspace(1) %out
+  store volatile i32 %elt3, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v_sext_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: v_sext_v4i16_to_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -605,17 +605,17 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 add
 ; VI-NEXT:    buffer_store_dword v2, off, s[4:7], 0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_endpgm
-  %a = load i64, i64 addrspace(1)* %in
+  %a = load i64, ptr addrspace(1) %in
   %cast = bitcast i64 %a to <4 x i16>
   %ext = sext <4 x i16> %cast to <4 x i32>
   %elt0 = extractelement <4 x i32> %ext, i32 0
   %elt1 = extractelement <4 x i32> %ext, i32 1
   %elt2 = extractelement <4 x i32> %ext, i32 2
   %elt3 = extractelement <4 x i32> %ext, i32 3
-  store volatile i32 %elt0, i32 addrspace(1)* %out
-  store volatile i32 %elt1, i32 addrspace(1)* %out
-  store volatile i32 %elt2, i32 addrspace(1)* %out
-  store volatile i32 %elt3, i32 addrspace(1)* %out
+  store volatile i32 %elt0, ptr addrspace(1) %out
+  store volatile i32 %elt1, ptr addrspace(1) %out
+  store volatile i32 %elt2, ptr addrspace(1) %out
+  store volatile i32 %elt3, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index f4ff6c1..d1f0535 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -4,7 +4,7 @@
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @sint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) {
 ; CI-LABEL: sint_to_fp_i32_to_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -27,13 +27,13 @@ define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %result = sitofp i32 %in to double
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
 ; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
 ; uses an SGPR (implicit vcc).
-define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @sint_to_fp_i1_f64(ptr addrspace(1) %out, i32 %in) {
 ; CI-LABEL: sint_to_fp_i1_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -63,11 +63,11 @@ define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in)
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %in, 0
   %fp = sitofp i1 %cmp to double
-  store double %fp, double addrspace(1)* %out, align 4
+  store double %fp, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
+define amdgpu_kernel void @sint_to_fp_i1_f64_load(ptr addrspace(1) %out, i1 %in) {
 ; CI-LABEL: sint_to_fp_i1_f64_load:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -96,11 +96,11 @@ define amdgpu_kernel void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %fp = sitofp i1 %in to double
-  store double %fp, double addrspace(1)* %out, align 8
+  store double %fp, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) {
 ; CI-LABEL: s_sint_to_fp_i64_to_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -127,11 +127,11 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i6
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %result = sitofp i64 %in to double
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; CI-LABEL: v_sint_to_fp_i64_to_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -170,15 +170,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i6
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %val = load i64, i64 addrspace(1)* %gep, align 8
+  %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %val = load i64, ptr addrspace(1) %gep, align 8
   %result = sitofp i64 %val to double
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: bfe and sext on VI+
-define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 %in) {
+define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) {
 ; CI-LABEL: s_sint_to_fp_i8_to_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -204,7 +204,7 @@ define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(double addrspace(1)* %out, i8
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %fp = sitofp i8 %in to double
-  store double %fp, double addrspace(1)* %out
+  store double %fp, ptr addrspace(1) %out
   ret void
 }
 
@@ -227,7 +227,7 @@ define double @v_sint_to_fp_i8_to_f64(i8 %in) {
   ret double %fp
   }
 
-define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
 ; CI-LABEL: s_select_sint_to_fp_i1_vals_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -257,11 +257,11 @@ define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(double addrspace(1)*
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, double -1.0, double 0.0
-  store double %select, double addrspace(1)* %out, align 8
+  store double %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define void @v_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
 ; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -274,11 +274,11 @@ define void @v_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, double -1.0, double 0.0
-  store double %select, double addrspace(1)* %out, align 8
+  store double %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
 ; CI-LABEL: s_select_sint_to_fp_i1_vals_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -308,11 +308,11 @@ define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %ou
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
-  store i64 %select, i64 addrspace(1)* %out, align 8
+  store i64 %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define void @v_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
 ; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -325,12 +325,12 @@ define void @v_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
-  store i64 %select, i64 addrspace(1)* %out, align 8
+  store i64 %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; TODO: This should swap the selected order / invert the compare and do it.
-define void @v_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
 ; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -343,12 +343,12 @@ define void @v_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, double 0.0, double -1.0
-  store double %select, double addrspace(1)* %out, align 8
+  store double %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; TODO: This should swap the selected order / invert the compare and do it.
-define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
 ; CI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -378,6 +378,6 @@ define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(double addrspace
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, double 0.0, double -1.0
-  store double %select, double addrspace(1)* %out, align 8
+  store double %select, ptr addrspace(1) %out, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
index 27bfcce..f7d57da 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
@@ -4,7 +4,7 @@
 
 ; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600
 
-define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) #0 {
 ; GFX6-LABEL: s_sint_to_fp_i64_to_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -51,11 +51,11 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %result = sitofp i64 %in to half
-  store half %result, half addrspace(1)* %out
+  store half %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: v_sint_to_fp_i64_to_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -115,15 +115,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX8-NEXT:    flat_store_short v[0:1], v3
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %result = sitofp i64 %val to half
-  store half %result, half addrspace(1)* %out.gep
+  store half %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
 ; GFX6-LABEL: s_sint_to_fp_i64_to_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -168,11 +168,11 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %result = sitofp i64 %in to float
-  store float %result, float addrspace(1)* %out
+  store float %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: v_sint_to_fp_i64_to_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -230,15 +230,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %result = sitofp i64 %val to float
-  store float %result, float addrspace(1)* %out.gep
+  store float %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) #0{
 ; GFX6-LABEL: s_sint_to_fp_v2i64_to_v2f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -307,11 +307,11 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
 ; GFX8-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8-NEXT:    s_endpgm
   %result = sitofp <2 x i64> %in to <2 x float>
-  store <2 x float> %result, <2 x float> addrspace(1)* %out
+  store <2 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: v_sint_to_fp_v4i64_to_v4f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -448,15 +448,15 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
 ; GFX8-NEXT:    flat_store_dwordx4 v[9:10], v[0:3]
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
-  %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+  %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
+  %value = load <4 x i64>, ptr addrspace(1) %in.gep
   %result = sitofp <4 x i64> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+  store <4 x float> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) #0{
 ; GFX6-LABEL: s_sint_to_fp_v2i64_to_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -532,11 +532,11 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %result = sitofp <2 x i64> %in to <2 x half>
-  store <2 x half> %result, <2 x half> addrspace(1)* %out
+  store <2 x half> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: v_sint_to_fp_v4i64_to_v4f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -687,11 +687,11 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
 ; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid
-  %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+  %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr <4 x half>, ptr addrspace(1) %out, i32 %tid
+  %value = load <4 x i64>, ptr addrspace(1) %in.gep
   %result = sitofp <4 x i64> %value to <4 x half>
-  store <4 x half> %result, <4 x half> addrspace(1)* %out.gep
+  store <4 x half> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
index b62b0de..73f8140 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
@@ -6,9 +6,9 @@
 ; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
 
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
   %result = sitofp i32 %in to float
-  store float %result, float addrspace(1)* %out
+  store float %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -16,13 +16,13 @@ define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
 ; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{v[0-9]+$}}
 
 ; R600: INT_TO_FLT
-define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %result = sitofp i32 %val to float
-  store float %result, float addrspace(1)* %out.gep
+  store float %result, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -32,9 +32,9 @@ define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
 
 ; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
 ; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-define amdgpu_kernel void @s_sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i32(ptr addrspace(1) %out, <2 x i32> %in) #0{
   %result = sitofp <2 x i32> %in to <2 x float>
-  store <2 x float> %result, <2 x float> addrspace(1)* %out
+  store <2 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -49,10 +49,10 @@ define amdgpu_kernel void @s_sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
-  %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
+define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %value = load <4 x i32>, ptr addrspace(1) %in
   %result = sitofp <4 x i32> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  store <4 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -66,13 +66,13 @@ define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)*
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
-  %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
+  %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
+  %value = load <4 x i32>, ptr addrspace(1) %in.gep
   %result = sitofp <4 x i32> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+  store <4 x float> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -82,10 +82,10 @@ define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
-define amdgpu_kernel void @s_sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i1_f32(ptr addrspace(1) %out, i32 %in) #0 {
   %cmp = icmp eq i32 %in, 0
   %fp = uitofp i1 %cmp to float
-  store float %fp, float addrspace(1)* %out
+  store float %fp, ptr addrspace(1) %out
   ret void
 }
 
@@ -93,9 +93,9 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
-define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, i1 %in) #0 {
   %fp = sitofp i1 %in to float
-  store float %fp, float addrspace(1)* %out
+  store float %fp, ptr addrspace(1) %out
   ret void
 }
 
@@ -106,13 +106,13 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %val = load i1, i1 addrspace(1)* %in.gep
+  %in.gep = getelementptr i1, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %val = load i1, ptr addrspace(1) %in.gep
   %fp = sitofp i1 %val to float
-  store float %fp, float addrspace(1)* %out.gep
+  store float %fp, ptr addrspace(1) %out.gep
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
index 144c693..aeeb2fd 100644
--- a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
@@ -12,12 +12,12 @@
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @sitofp_i16_to_f16(
-    half addrspace(1)* %r,
-    i16 addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load i16, i16 addrspace(1)* %a
+  %a.val = load i16, ptr addrspace(1) %a
   %r.val = sitofp i16 %a.val to half
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -28,12 +28,12 @@ entry:
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @sitofp_i32_to_f16(
-    half addrspace(1)* %r,
-    i32 addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load i32, i32 addrspace(1)* %a
+  %a.val = load i32, ptr addrspace(1) %a
   %r.val = sitofp i32 %a.val to half
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -57,12 +57,12 @@ entry:
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @sitofp_v2i16_to_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x i16> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
+  %a.val = load <2 x i16>, ptr addrspace(1) %a
   %r.val = sitofp <2 x i16> %a.val to <2 x half>
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -85,12 +85,12 @@ entry:
 ; GCN: buffer_store_dword
 ; GCN: s_endpgm
 define amdgpu_kernel void @sitofp_v2i32_to_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x i32> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a
+  %a.val = load <2 x i32>, ptr addrspace(1) %a
   %r.val = sitofp <2 x i32> %a.val to <2 x half>
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -102,14 +102,14 @@ entry:
 ; GCN-NEXT: v_cvt_f16_f32_e32 [[R_F16:v[0-9]+]], [[RESULT]]
 ; GCN: buffer_store_short
 ; GCN: s_endpgm
-define amdgpu_kernel void @s_sint_to_fp_i1_to_f16(half addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
-  %a = load float, float addrspace(1) * %in0
-  %b = load float, float addrspace(1) * %in1
+define amdgpu_kernel void @s_sint_to_fp_i1_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
+  %a = load float, ptr addrspace(1) %in0
+  %b = load float, ptr addrspace(1) %in1
   %acmp = fcmp oge float %a, 0.000000e+00
   %bcmp = fcmp oge float %b, 1.000000e+00
   %result = xor i1 %acmp, %bcmp
   %fp = sitofp i1 %result to half
-  store half %fp, half addrspace(1)* %out
+  store half %fp, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index ddb9aef..ba52d70 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -4,7 +4,7 @@
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: v_uint_to_fp_i64_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -43,14 +43,14 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i6
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %val = load i64, i64 addrspace(1)* %gep, align 8
+  %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %val = load i64, ptr addrspace(1) %gep, align 8
   %result = uitofp i64 %val to double
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) {
 ; SI-LABEL: s_uint_to_fp_i64_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -77,11 +77,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i6
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %cast = uitofp i64 %in to double
-  store double %cast, double addrspace(1)* %out, align 8
+  store double %cast, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i64> %in) {
+define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(ptr addrspace(1) %out, <2 x i64> %in) {
 ; SI-LABEL: s_uint_to_fp_v2i64_to_v2f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x4
@@ -119,11 +119,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1)
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %cast = uitofp <2 x i64> %in to <2 x double>
-  store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
+  store <2 x double> %cast, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %in) {
+define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(ptr addrspace(1) %out, <4 x i64> %in) {
 ; SI-LABEL: s_uint_to_fp_v4i64_to_v4f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x8
@@ -187,11 +187,11 @@ define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1)
 ; VI-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
 ; VI-NEXT:    s_endpgm
   %cast = uitofp <4 x i64> %in to <4 x double>
-  store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
+  store <4 x double> %cast, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) {
 ; SI-LABEL: s_uint_to_fp_i32_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -214,11 +214,11 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i3
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %cast = uitofp i32 %in to double
-  store double %cast, double addrspace(1)* %out, align 8
+  store double %cast, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(ptr addrspace(1) %out, <2 x i32> %in) {
 ; GCN-LABEL: s_uint_to_fp_v2i32_to_v2f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -230,11 +230,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %cast = uitofp <2 x i32> %in to <2 x double>
-  store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
+  store <2 x double> %cast, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(ptr addrspace(1) %out, <4 x i32> %in) {
 ; SI-LABEL: s_uint_to_fp_v4i32_to_v4f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x4
@@ -275,13 +275,13 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %cast = uitofp <4 x i32> %in to <4 x double>
-  store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
+  store <4 x double> %cast, ptr addrspace(1) %out, align 16
   ret void
 }
 
 ; We can't fold the SGPRs into v_cndmask_b32_e32, because it already
 ; uses an SGPR (implicit vcc).
-define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @uint_to_fp_i1_to_f64(ptr addrspace(1) %out, i32 %in) {
 ; SI-LABEL: uint_to_fp_i1_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -311,11 +311,11 @@ define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %in, 0
   %fp = uitofp i1 %cmp to double
-  store double %fp, double addrspace(1)* %out, align 4
+  store double %fp, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out, i1 %in) {
+define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(ptr addrspace(1) %out, i1 %in) {
 ; SI-LABEL: uint_to_fp_i1_to_f64_load:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -344,11 +344,11 @@ define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out,
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %fp = uitofp i1 %in to double
-  store double %fp, double addrspace(1)* %out, align 8
+  store double %fp, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 %in) {
+define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) {
 ; SI-LABEL: s_uint_to_fp_i8_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -373,7 +373,7 @@ define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(double addrspace(1)* %out, i8
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %fp = uitofp i8 %in to double
-  store double %fp, double addrspace(1)* %out
+  store double %fp, ptr addrspace(1) %out
   ret void
 }
 
@@ -397,7 +397,7 @@ define double @v_uint_to_fp_i8_to_f64(i8 %in) {
   ret double %fp
 }
 
-define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
 ; SI-LABEL: s_select_uint_to_fp_i1_vals_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -427,11 +427,11 @@ define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(double addrspace(1)*
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, double 1.0, double 0.0
-  store double %select, double addrspace(1)* %out, align 8
+  store double %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define void @v_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define void @v_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
 ; GCN-LABEL: v_select_uint_to_fp_i1_vals_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -444,11 +444,11 @@ define void @v_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, double 1.0, double 0.0
-  store double %select, double addrspace(1)* %out, align 8
+  store double %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
 ; SI-LABEL: s_select_uint_to_fp_i1_vals_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -478,11 +478,11 @@ define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %ou
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
-  store i64 %select, i64 addrspace(1)* %out, align 8
+  store i64 %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define void @v_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+define void @v_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
 ; GCN-LABEL: v_select_uint_to_fp_i1_vals_i64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -495,12 +495,12 @@ define void @v_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
-  store i64 %select, i64 addrspace(1)* %out, align 8
+  store i64 %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; TODO: This should swap the selected order / invert the compare and do it.
-define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
 ; SI-LABEL: s_swap_select_uint_to_fp_i1_vals_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -530,11 +530,11 @@ define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(double addrspace
 ; VI-NEXT:    s_endpgm
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, double 0.0, double 1.0
-  store double %select, double addrspace(1)* %out, align 8
+  store double %select, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define void @v_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define void @v_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
 ; GCN-LABEL: v_swap_select_uint_to_fp_i1_vals_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -547,6 +547,6 @@ define void @v_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %in, 0
   %select = select i1 %cmp, double 0.0, double 1.0
-  store double %select, double addrspace(1)* %out, align 8
+  store double %select, ptr addrspace(1) %out, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
index 8f8acf4..226facf 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
@@ -4,7 +4,7 @@
 
 ; FIXME: This should be merged with uint_to_fp.ll, but s_uint_to_fp_v2i64 crashes on r600
 
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) #0 {
 ; GFX6-LABEL: s_uint_to_fp_i64_to_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -43,11 +43,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %result = uitofp i64 %in to half
-  store half %result, half addrspace(1)* %out
+  store half %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: v_uint_to_fp_i64_to_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -99,15 +99,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX8-NEXT:    flat_store_short v[0:1], v3
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %result = uitofp i64 %val to half
-  store half %result, half addrspace(1)* %out.gep
+  store half %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
 ; GFX6-LABEL: s_uint_to_fp_i64_to_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -144,11 +144,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %result = uitofp i64 %in to float
-  store float %result, float addrspace(1)* %out
+  store float %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: v_uint_to_fp_i64_to_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -198,15 +198,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %result = uitofp i64 %val to float
-  store float %result, float addrspace(1)* %out.gep
+  store float %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) #0{
 ; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -259,11 +259,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
 ; GFX8-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8-NEXT:    s_endpgm
   %result = uitofp <2 x i64> %in to <2 x float>
-  store <2 x float> %result, <2 x float> addrspace(1)* %out
+  store <2 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -368,15 +368,15 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
 ; GFX8-NEXT:    flat_store_dwordx4 v[9:10], v[0:3]
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
-  %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+  %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
+  %value = load <4 x i64>, ptr addrspace(1) %in.gep
   %result = uitofp <4 x i64> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+  store <4 x float> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) #0{
 ; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -436,11 +436,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %result = uitofp <2 x i64> %in to <2 x half>
-  store <2 x half> %result, <2 x half> addrspace(1)* %out
+  store <2 x half> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -559,11 +559,11 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
 ; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid
-  %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+  %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr <4 x half>, ptr addrspace(1) %out, i32 %tid
+  %value = load <4 x i64>, ptr addrspace(1) %in.gep
   %result = uitofp <4 x i64> %value to <4 x half>
-  store <4 x half> %result, <4 x half> addrspace(1)* %out.gep
+  store <4 x half> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
index e0a1a75..34cf1b6 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
@@ -6,9 +6,9 @@
 ; SI: v_cvt_f32_u32_e32
 
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
   %result = uitofp i32 %in to float
-  store float %result, float addrspace(1)* %out
+  store float %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -16,13 +16,13 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
 ; SI: v_cvt_f32_u32_e32 {{v[0-9]+}}, {{v[0-9]+$}}
 
 ; R600: INT_TO_FLT
-define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %result = uitofp i32 %val to float
-  store float %result, float addrspace(1)* %out.gep
+  store float %result, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -32,9 +32,9 @@ define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
 
 ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
 ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(ptr addrspace(1) %out, <2 x i32> %in) #0 {
   %result = uitofp <2 x i32> %in to <2 x float>
-  store <2 x float> %result, <2 x float> addrspace(1)* %out
+  store <2 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -49,10 +49,10 @@ define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)*
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
-  %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
+define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %value = load <4 x i32>, ptr addrspace(1) %in
   %result = uitofp <4 x i32> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  store <4 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -66,13 +66,13 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)*
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
-  %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
+  %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
+  %value = load <4 x i32>, ptr addrspace(1) %in.gep
   %result = uitofp <4 x i32> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+  store <4 x float> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -82,10 +82,10 @@ define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
-define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
   %cmp = icmp eq i32 %in, 0
   %fp = uitofp i1 %cmp to float
-  store float %fp, float addrspace(1)* %out
+  store float %fp, ptr addrspace(1) %out
   ret void
 }
 
@@ -93,9 +93,9 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
-define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(ptr addrspace(1) %out, i1 %in) #0 {
   %fp = uitofp i1 %in to float
-  store float %fp, float addrspace(1)* %out
+  store float %fp, ptr addrspace(1) %out
   ret void
 }
 
@@ -106,13 +106,13 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out,
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %val = load i1, i1 addrspace(1)* %in.gep
+  %in.gep = getelementptr i1, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %val = load i1, ptr addrspace(1) %in.gep
   %fp = uitofp i1 %val to float
-  store float %fp, float addrspace(1)* %out.gep
+  store float %fp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -122,10 +122,10 @@ define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1
 ; R600: CNDE_INT
 ; R600: UINT_TO_FLT
 
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
 entry:
   %cvt = uitofp i64 %in to float
-  store float %cvt, float addrspace(1)* %out
+  store float %cvt, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
index 99a3141..fd99d80 100644
--- a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
@@ -11,12 +11,12 @@
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @uitofp_i16_to_f16(
-    half addrspace(1)* %r,
-    i16 addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load i16, i16 addrspace(1)* %a
+  %a.val = load i16, ptr addrspace(1) %a
   %r.val = uitofp i16 %a.val to half
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -27,12 +27,12 @@ entry:
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @uitofp_i32_to_f16(
-    half addrspace(1)* %r,
-    i32 addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load i32, i32 addrspace(1)* %a
+  %a.val = load i32, ptr addrspace(1) %a
   %r.val = uitofp i32 %a.val to half
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -56,12 +56,12 @@ entry:
 ; GCN: buffer_store_dword
 ; GCN: s_endpgm
 define amdgpu_kernel void @uitofp_v2i16_to_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x i16> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
+  %a.val = load <2 x i16>, ptr addrspace(1) %a
   %r.val = uitofp <2 x i16> %a.val to <2 x half>
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -84,12 +84,12 @@ entry:
 ; GCN:     buffer_store_dword
 ; GCN:     s_endpgm
 define amdgpu_kernel void @uitofp_v2i32_to_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x i32> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a
+  %a.val = load <2 x i32>, ptr addrspace(1) %a
   %r.val = uitofp <2 x i32> %a.val to <2 x half>
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -101,14 +101,14 @@ entry:
 ; GCN-NEXT: v_cvt_f16_f32_e32 [[R_F16:v[0-9]+]], [[RESULT]]
 ; GCN: buffer_store_short
 ; GCN: s_endpgm
-define amdgpu_kernel void @s_uint_to_fp_i1_to_f16(half addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
-  %a = load float, float addrspace(1) * %in0
-  %b = load float, float addrspace(1) * %in1
+define amdgpu_kernel void @s_uint_to_fp_i1_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
+  %a = load float, ptr addrspace(1) %in0
+  %b = load float, ptr addrspace(1) %in1
   %acmp = fcmp oge float %a, 0.000000e+00
   %bcmp = fcmp oge float %b, 1.000000e+00
   %result = xor i1 %acmp, %bcmp
   %fp = uitofp i1 %result to half
-  store half %fp, half addrspace(1)* %out
+  store half %fp, ptr addrspace(1) %out
   ret void
 }
 
-- 
2.7.4