From 6a91a5e82647f206a31706586d201ecc638e9365 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 29 Nov 2022 18:41:50 -0500 Subject: [PATCH] AMDGPU: Convert some cast tests to opaque pointers --- llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll | 32 +-- .../CodeGen/AMDGPU/addrspacecast-constantexpr.ll | 136 ++++++------ .../addrspacecast-initializer-unsupported.ll | 4 +- .../CodeGen/AMDGPU/addrspacecast-initializer.ll | 6 +- .../CodeGen/AMDGPU/addrspacecast-known-non-null.ll | 20 +- llvm/test/CodeGen/AMDGPU/addrspacecast.ll | 152 +++++++------ .../test/CodeGen/AMDGPU/any_extend_vector_inreg.ll | 17 +- llvm/test/CodeGen/AMDGPU/anyext.ll | 20 +- .../AMDGPU/codegen-prepare-addrmode-sext.ll | 6 +- llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll | 8 +- llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll | 6 +- llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll | 6 +- llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll | 6 +- llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll | 28 +-- llvm/test/CodeGen/AMDGPU/fp_to_sint.ll | 42 ++-- llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll | 36 +-- llvm/test/CodeGen/AMDGPU/fp_to_uint.ll | 38 ++-- llvm/test/CodeGen/AMDGPU/fpext.f16.ll | 120 +++++----- llvm/test/CodeGen/AMDGPU/fpext.ll | 20 +- llvm/test/CodeGen/AMDGPU/fptosi.f16.ll | 52 ++--- llvm/test/CodeGen/AMDGPU/fptoui.f16.ll | 52 ++--- llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll | 6 +- llvm/test/CodeGen/AMDGPU/setcc-sext.ll | 52 ++--- llvm/test/CodeGen/AMDGPU/sext-eliminate.ll | 8 +- llvm/test/CodeGen/AMDGPU/sext-in-reg.ll | 246 ++++++++++----------- llvm/test/CodeGen/AMDGPU/sign_extend.ll | 82 +++---- llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll | 52 ++--- llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll | 56 ++--- llvm/test/CodeGen/AMDGPU/sint_to_fp.ll | 52 ++--- llvm/test/CodeGen/AMDGPU/sitofp.f16.ll | 40 ++-- llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 68 +++--- llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll | 56 ++--- llvm/test/CodeGen/AMDGPU/uint_to_fp.ll | 56 ++--- llvm/test/CodeGen/AMDGPU/uitofp.f16.ll | 40 ++-- 34 files changed, 808 insertions(+), 813 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll index 6e3550b..db5d39e 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll @@ -7,40 +7,40 @@ declare void @consume_ptr2int(i32) #0 ; CHECK-LABEL: @addrspacecast_captured( ; CHECK: %data = alloca i32, align 4, addrspace(5) -; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32* -; CHECK: %ptr2int = ptrtoint i32* %cast to i32 -; CHECK: store i32 %ptr2int, i32 addrspace(1)* %out -define amdgpu_kernel void @addrspacecast_captured(i32 addrspace(1)* %out) #0 { +; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr +; CHECK: %ptr2int = ptrtoint ptr %cast to i32 +; CHECK: store i32 %ptr2int, ptr addrspace(1) %out +define amdgpu_kernel void @addrspacecast_captured(ptr addrspace(1) %out) #0 { entry: %data = alloca i32, align 4, addrspace(5) - %cast = addrspacecast i32 addrspace(5)* %data to i32* - %ptr2int = ptrtoint i32* %cast to i32 - store i32 %ptr2int, i32 addrspace(1)* %out + %cast = addrspacecast ptr addrspace(5) %data to ptr + %ptr2int = ptrtoint ptr %cast to i32 + store i32 %ptr2int, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @addrspacecast_captured_store( ; CHECK: %data = alloca i32, align 4, addrspace(5) -; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32* -; CHECK: store i32* %cast, i32* addrspace(1)* %out -define amdgpu_kernel void @addrspacecast_captured_store(i32* addrspace(1)* %out) #0 { +; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr +; CHECK: store ptr %cast, ptr addrspace(1) %out +define amdgpu_kernel void @addrspacecast_captured_store(ptr addrspace(1) %out) #0 { entry: %data = alloca i32, align 4, addrspace(5) - %cast = addrspacecast i32 addrspace(5)* %data to i32* - store i32* %cast, i32* addrspace(1)* %out + %cast = addrspacecast ptr addrspace(5) %data to ptr + store ptr %cast, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @addrspacecast_captured_call( ; CHECK: %data = alloca i32, align 4, addrspace(5) -; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32* -; CHECK: %ptr2int = ptrtoint i32* %cast to i32 +; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr +; CHECK: %ptr2int = ptrtoint ptr %cast to i32 ; CHECK: call void @consume_ptr2int(i32 %ptr2int) define amdgpu_kernel void @addrspacecast_captured_call() #0 { entry: %data = alloca i32, align 4, addrspace(5) - %cast = addrspacecast i32 addrspace(5)* %data to i32* - %ptr2int = ptrtoint i32* %cast to i32 + %cast = addrspacecast ptr addrspace(5) %data to ptr + %ptr2int = ptrtoint ptr %cast to i32 call void @consume_ptr2int(i32 %ptr2int) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index b3be370..66f249f 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -2,7 +2,7 @@ ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s -declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0 +declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i32, i1) #0 @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 @@ -19,179 +19,179 @@ declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrs define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 { ; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast ; HSA-SAME: () #[[ATTR1:[0-9]+]] { -; HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), align 4 +; HSA-NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 4 ; HSA-NEXT: ret void ; - store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) + store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)) ret void } define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast ; AKF_HSA-SAME: () #[[ATTR1]] { -; AKF_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4 +; AKF_HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4 ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4 +; ATTRIBUTOR_HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; - store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*) + store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)) ret void } define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat ; AKF_HSA-SAME: () #[[ATTR1]] { -; AKF_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4 +; AKF_HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4 ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4 +; ATTRIBUTOR_HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; - store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*) + store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)) ret void } define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat ; AKF_HSA-SAME: () #[[ATTR1]] { -; AKF_HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 +; AKF_HSA-NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4 ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 +; ATTRIBUTOR_HSA-NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; - store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) + store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) ret void } define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 { ; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat ; HSA-SAME: () #[[ATTR1]] { -; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*), align 4 +; HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.i32 to ptr addrspace(4)), align 4 ; HSA-NEXT: ret void ; - store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*) + store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.i32 to ptr addrspace(4)) ret void } define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 { ; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat ; HSA-SAME: () #[[ATTR1]] { -; HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 +; HSA-NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(4)), i64 0, i64 8), align 4 ; HSA-NEXT: ret void ; - store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) + store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(4)), i64 0, i64 8) ret void } -define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat -; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { -; AKF_HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 -; AKF_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 +; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] { +; AKF_HSA-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4 +; AKF_HSA-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4 ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 -; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 +; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] { +; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4 +; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; - %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) - store i32 %val, i32 addrspace(1)* %out + %val = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) + store i32 %val, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat -; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { -; AKF_HSA-NEXT: [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4 -; AKF_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 +; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] { +; AKF_HSA-NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4 +; AKF_HSA-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4 ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4 -; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 +; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] { +; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4 +; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; - %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst - store i32 %val, i32 addrspace(1)* %out + %val = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst + store i32 %val, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat -; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { -; AKF_HSA-NEXT: [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4 +; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] { +; AKF_HSA-NEXT: [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4 ; AKF_HSA-NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0 -; AKF_HSA-NEXT: store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4 +; AKF_HSA-NEXT: store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4 ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4 +; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] { +; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4 ; ATTRIBUTOR_HSA-NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0 -; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4 +; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; - %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst + %val = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 - store i32 %val0, i32 addrspace(1)* %out + store i32 %val0, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat -; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { -; AKF_HSA-NEXT: call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) +; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] { +; AKF_HSA-NEXT: call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false) ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) +; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] { +; ATTRIBUTOR_HSA-NEXT: call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false) ; ATTRIBUTOR_HSA-NEXT: ret void ; - call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) + call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 %out, ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false) ret void } ; Can't just search the pointer value -define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 { +define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat -; AKF_HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { -; AKF_HSA-NEXT: store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8 +; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] { +; AKF_HSA-NEXT: store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8 ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8 +; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] { +; ATTRIBUTOR_HSA-NEXT: store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8 ; ATTRIBUTOR_HSA-NEXT: ret void ; - store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out + store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) %out ret void } ; Can't just search pointer types -define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 { +define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat -; AKF_HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { -; AKF_HSA-NEXT: store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4 +; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] { +; AKF_HSA-NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 4 ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat -; ATTRIBUTOR_HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4 +; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] { +; ATTRIBUTOR_HSA-NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; - store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out + store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) %out ret void } @@ -199,28 +199,28 @@ define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group ; AKF_HSA-SAME: () #[[ATTR1]] { -; AKF_HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4 +; AKF_HSA-NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4 ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4 +; ATTRIBUTOR_HSA-NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; - store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) + store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)) ret void } -define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { +define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group ; AKF_HSA-SAME: () #[[ATTR1]] { -; AKF_HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) +; AKF_HSA-NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)) ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) +; ATTRIBUTOR_HSA-NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)) ; - ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) + ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)) } attributes #0 = { argmemonly nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll index 593e37f..cbd3e68 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll @@ -1,7 +1,7 @@ ; RUN: not --crash llc -march=amdgcn -verify-machineinstrs -amdgpu-enable-lower-module-lds=false < %s 2>&1 | FileCheck -check-prefix=ERROR %s -; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*) +; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)) @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 -@gv_flatptr_from_lds = unnamed_addr addrspace(2) global i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 +@gv_flatptr_from_lds = unnamed_addr addrspace(2) global ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4 diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll index 4f5082f..e22fcd8 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll @@ -19,9 +19,9 @@ @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 @constant.arr = external unnamed_addr addrspace(4) global [256 x i32], align 4 -@gv_flatptr_from_global = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4 +@gv_flatptr_from_global = unnamed_addr addrspace(4) global ptr addrspace(0) getelementptr ([256 x i32], ptr addrspace(0) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(0)), i64 0, i64 8), align 4 -@gv_global_ptr = unnamed_addr addrspace(4) global i32 addrspace(1)* getelementptr ([256 x i32], [256 x i32] addrspace(1)* @global.arr, i64 0, i64 8), align 4 +@gv_global_ptr = unnamed_addr addrspace(4) global ptr addrspace(1) getelementptr ([256 x i32], ptr addrspace(1) @global.arr, i64 0, i64 8), align 4 -@gv_flatptr_from_constant = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(4)* @constant.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4 +@gv_flatptr_from_constant = unnamed_addr addrspace(4) global ptr addrspace(0) getelementptr ([256 x i32], ptr addrspace(0) addrspacecast (ptr addrspace(4) @constant.arr to ptr addrspace(0)), i64 0, i64 8), align 4 diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll index 2edaa88..5de8a6f 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll @@ -4,8 +4,8 @@ ; Test that a null check is not emitted for lowered addrspacecast -define void @flat_user(i8* %ptr) { - store i8 0, i8* %ptr +define void @flat_user(ptr %ptr) { + store i8 0, ptr %ptr ret void } @@ -18,8 +18,8 @@ define void @flat_user(i8* %ptr) { ; CHECK-NOT: v1 define void @cast_alloca() { %alloca = alloca i8, addrspace(5) - %cast = addrspacecast i8 addrspace(5)* %alloca to i8* - call void @flat_user(i8* %cast) + %cast = addrspacecast ptr addrspace(5) %alloca to ptr + call void @flat_user(ptr %cast) ret void } @@ -33,8 +33,8 @@ define void @cast_alloca() { ; CHECK-NOT: v0 ; CHECK-NOT: v1 define void @cast_lds_gv() { - %cast = addrspacecast i8 addrspace(3)* @lds to i8* - call void @flat_user(i8* %cast) + %cast = addrspacecast ptr addrspace(3) @lds to ptr + call void @flat_user(ptr %cast) ret void } @@ -42,7 +42,7 @@ define void @cast_lds_gv() { ; CHECK: v_mov_b32_e32 v0, 0 ; CHECK: v_mov_b32_e32 v1, 0 define void @cast_constant_lds_neg1_gv() { - call void @flat_user(i8* addrspacecast (i8 addrspace(3)* inttoptr (i32 -1 to i8 addrspace(3)*) to i8*)) + call void @flat_user(ptr addrspacecast (ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr)) ret void } @@ -50,7 +50,7 @@ define void @cast_constant_lds_neg1_gv() { ; CHECK: v_mov_b32_e32 v0, 0 ; CHECK: v_mov_b32_e32 v1, 0 define void @cast_constant_private_neg1_gv() { - call void @flat_user(i8* addrspacecast (i8 addrspace(5)* inttoptr (i32 -1 to i8 addrspace(5)*) to i8*)) + call void @flat_user(ptr addrspacecast (ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr)) ret void } @@ -60,7 +60,7 @@ define void @cast_constant_private_neg1_gv() { ; CHECK: v_mov_b32_e32 v0, 0x7b ; CHECK: v_mov_b32_e32 v1, [[APERTURE]] define void @cast_constant_lds_other_gv() { - call void @flat_user(i8* addrspacecast (i8 addrspace(3)* inttoptr (i32 123 to i8 addrspace(3)*) to i8*)) + call void @flat_user(ptr addrspacecast (ptr addrspace(3) inttoptr (i32 123 to ptr addrspace(3)) to ptr)) ret void } @@ -70,6 +70,6 @@ define void @cast_constant_lds_other_gv() { ; CHECK: v_mov_b32_e32 v0, 0x7b ; CHECK: v_mov_b32_e32 v1, [[APERTURE]] define void @cast_constant_private_other_gv() { - call void @flat_user(i8* addrspacecast (i8 addrspace(5)* inttoptr (i32 123 to i8 addrspace(5)*) to i8*)) + call void @flat_user(ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr)) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index f576079..b446166 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -29,9 +29,9 @@ ; number SGPR. ; HSA: NumSgprs: {{[0-9]+}} -define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32* - store volatile i32 7, i32* %stof +define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #0 { + %stof = addrspacecast ptr addrspace(3) %ptr to ptr + store volatile i32 7, ptr %stof ret void } @@ -54,9 +54,9 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt ; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]] -define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32* - store volatile i32 7, i32* %stof +define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 { + %stof = addrspacecast ptr addrspace(3) %ptr to ptr + store volatile i32 7, ptr %stof ret void } @@ -88,9 +88,9 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 { ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]] ; HSA: NumSgprs: {{[0-9]+}} -define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(5)* %ptr to i32* - store volatile i32 7, i32* %stof +define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { + %stof = addrspacecast ptr addrspace(5) %ptr to ptr + store volatile i32 7, ptr %stof ret void } @@ -103,9 +103,9 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* % ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]] -define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(1)* %ptr to i32* - store volatile i32 7, i32* %stof +define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 { + %stof = addrspacecast ptr addrspace(1) %ptr to ptr + store volatile i32 7, ptr %stof ret void } @@ -115,9 +115,9 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %p ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] ; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]] -define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(4)* %ptr to i32* - %ld = load volatile i32, i32* %stof +define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #0 { + %stof = addrspacecast ptr addrspace(4) %ptr to ptr + %ld = load volatile i32, ptr %stof ret void } @@ -129,9 +129,9 @@ define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* ; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GFX9: global_load_dword v{{[0-9]+}}, [[ZERO:v[0-9]+]], s[[[PTRLO]]:[[PTRHI]]] -define amdgpu_kernel void @use_constant_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)* - %ld = load volatile i32, i32 addrspace(1)* %stof +define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4) %ptr) #0 { + %stof = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(1) + %ld = load volatile i32, ptr addrspace(1) %stof ret void } @@ -151,9 +151,9 @@ define amdgpu_kernel void @use_constant_to_global_addrspacecast(i32 addrspace(4) ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]] ; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]] ; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]] -define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 { - %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* - store volatile i32 0, i32 addrspace(3)* %ftos +define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 { + %ftos = addrspacecast ptr %ptr to ptr addrspace(3) + store volatile i32 0, ptr addrspace(3) %ftos ret void } @@ -176,9 +176,9 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 { ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]] ; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} ; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} -define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 { - %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* - store volatile i32 0, i32 addrspace(5)* %ftos +define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 { + %ftos = addrspacecast ptr %ptr to ptr addrspace(5) + store volatile i32 0, ptr addrspace(5) %ftos ret void } @@ -193,9 +193,9 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 { ; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}} -define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 { - %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* - store volatile i32 0, i32 addrspace(1)* %ftos +define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 { + %ftos = addrspacecast ptr %ptr to ptr addrspace(1) + store volatile i32 0, ptr addrspace(1) %ftos ret void } @@ -204,9 +204,9 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 { ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0 ; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0 -define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 { - %ftos = addrspacecast i32* %ptr to i32 addrspace(4)* - load volatile i32, i32 addrspace(4)* %ftos +define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 { + %ftos = addrspacecast ptr %ptr to ptr addrspace(4) + load volatile i32, ptr addrspace(4) %ftos ret void } @@ -223,8 +223,8 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 { ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]] define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 { - %cast = addrspacecast i32 addrspace(3)* null to i32* - store volatile i32 7, i32* %cast + %cast = addrspacecast ptr addrspace(3) null to ptr + store volatile i32 7, ptr %cast ret void } @@ -233,8 +233,8 @@ define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 { ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} ; HSA: ds_write_b32 [[PTR]], [[K]] define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 { - %cast = addrspacecast i32* null to i32 addrspace(3)* - store volatile i32 7, i32 addrspace(3)* %cast + %cast = addrspacecast ptr null to ptr addrspace(3) + store volatile i32 7, ptr addrspace(3) %cast ret void } @@ -244,8 +244,8 @@ define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 { ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]] define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 { - %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32* - store volatile i32 7, i32* %cast + %cast = addrspacecast ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr + store volatile i32 7, ptr %cast ret void } @@ -254,8 +254,8 @@ define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 { ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} ; HSA: ds_write_b32 [[PTR]], [[K]] define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 { - %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(3)* - store volatile i32 7, i32 addrspace(3)* %cast + %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(3) + store volatile i32 7, ptr addrspace(3) %cast ret void } @@ -273,8 +273,8 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 { ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]] define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { - %cast = addrspacecast i32 addrspace(5)* null to i32* - store volatile i32 7, i32* %cast + %cast = addrspacecast ptr addrspace(5) null to ptr + store volatile i32 7, ptr %cast ret void } @@ -283,8 +283,8 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} ; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 { - %cast = addrspacecast i32* null to i32 addrspace(5)* - store volatile i32 7, i32 addrspace(5)* %cast + %cast = addrspacecast ptr null to ptr addrspace(5) + store volatile i32 7, ptr addrspace(5) %cast ret void } @@ -298,8 +298,8 @@ define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 { ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]] define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 { - %cast = addrspacecast i32 addrspace(5)* inttoptr (i32 -1 to i32 addrspace(5)*) to i32* - store volatile i32 7, i32* %cast + %cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr + store volatile i32 7, ptr %cast ret void } @@ -308,8 +308,8 @@ define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 { ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} ; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 { - %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(5)* - store volatile i32 7, i32 addrspace(5)* %cast + %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(5) + store volatile i32 7, ptr addrspace(5) %cast ret void } @@ -320,24 +320,24 @@ define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 { ; HSA-LABEL: {{^}}branch_use_flat_i32: ; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} ; HSA: s_endpgm -define amdgpu_kernel void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 { +define amdgpu_kernel void @branch_use_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 %x, i32 %c) #0 { entry: %cmp = icmp ne i32 %c, 0 br i1 %cmp, label %local, label %global local: - %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32* + %flat_local = addrspacecast ptr addrspace(3) %lptr to ptr br label %end global: - %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32* + %flat_global = addrspacecast ptr addrspace(1) %gptr to ptr br label %end end: - %fptr = phi i32* [ %flat_local, %local ], [ %flat_global, %global ] - store volatile i32 %x, i32* %fptr, align 4 -; %val = load i32, i32* %fptr, align 4 -; store i32 %val, i32 addrspace(1)* %out, align 4 + %fptr = phi ptr [ %flat_local, %local ], [ %flat_global, %global ] + store volatile i32 %x, ptr %fptr, align 4 +; %val = load i32, ptr %fptr, align 4 +; store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -353,16 +353,16 @@ end: ; HSA: {{flat|global}}_store_dword ; HSA: s_barrier ; HSA: {{flat|global}}_load_dword -define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { +define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) #0 { %alloca = alloca i32, i32 9, align 4, addrspace(5) %x = call i32 @llvm.amdgcn.workitem.id.x() #2 - %pptr = getelementptr i32, i32 addrspace(5)* %alloca, i32 %x - %fptr = addrspacecast i32 addrspace(5)* %pptr to i32* - store volatile i32 %x, i32* %fptr + %pptr = getelementptr i32, ptr addrspace(5) %alloca, i32 %x + %fptr = addrspacecast ptr addrspace(5) %pptr to ptr + store volatile i32 %x, ptr %fptr ; Dummy call call void @llvm.amdgcn.s.barrier() #1 - %reload = load volatile i32, i32* %fptr, align 4 - store volatile i32 %reload, i32 addrspace(1)* %out, align 4 + %reload = load volatile i32, ptr %fptr, align 4 + store volatile i32 %reload, ptr addrspace(1) %out, align 4 ret void } @@ -373,12 +373,11 @@ define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i3 ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}} ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]] ; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}} -define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(i8 addrspace(4)* addrspace(4)* %ptr.ptr, i32 %offset) #0 { - %ptr = load volatile i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %ptr.ptr - %addrspacecast = addrspacecast i8 addrspace(4)* %ptr to i8 addrspace(6)* - %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset - %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)* - %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4 +define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 { + %ptr = load volatile ptr addrspace(4), ptr addrspace(4) %ptr.ptr + %addrspacecast = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(6) + %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset + %load = load volatile i32, ptr addrspace(6) %gep, align 4 ret void } @@ -389,12 +388,11 @@ define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(i8 addrspace ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}} ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]] ; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}} -define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1)* addrspace(4)* %ptr.ptr, i32 %offset) #0 { - %ptr = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* %ptr.ptr - %addrspacecast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(6)* - %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset - %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)* - %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4 +define amdgpu_kernel void @use_global_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 { + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) %ptr.ptr + %addrspacecast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(6) + %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset + %load = load volatile i32, ptr addrspace(6) %gep, align 4 ret void } @@ -403,9 +401,9 @@ define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]] ; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(i32 addrspace(6)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(6)* %ptr to i32* - %load = load volatile i32, i32* %stof +define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(ptr addrspace(6) %ptr) #0 { + %stof = addrspacecast ptr addrspace(6) %ptr to ptr + %load = load volatile i32, ptr %stof ret void } @@ -414,9 +412,9 @@ define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(i32 addrspa ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0xffff8000 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]] ; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(i32 addrspace(6)* %ptr) #3 { - %stof = addrspacecast i32 addrspace(6)* %ptr to i32* - %load = load volatile i32, i32* %stof +define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) #3 { + %stof = addrspacecast ptr addrspace(6) %ptr to ptr + %load = load volatile i32, ptr %stof ret void } diff --git a/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll b/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll index 2e5c32c..dc7fc9f 100644 --- a/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll @@ -24,20 +24,17 @@ ; GCN: {{buffer|flat}}_store_byte ; GCN: {{buffer|flat}}_store_byte ; GCN: {{buffer|flat}}_store_byte -define amdgpu_kernel void @any_extend_vector_inreg_v16i8_to_v4i32(<8 x i8> addrspace(1)* nocapture readonly %arg, <16 x i8> addrspace(1)* %arg1) local_unnamed_addr #0 { +define amdgpu_kernel void @any_extend_vector_inreg_v16i8_to_v4i32(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) %arg1) local_unnamed_addr #0 { bb: - %tmp = bitcast <8 x i8> addrspace(1)* %arg to <16 x i8> addrspace(1)* - %tmp2 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp, align 16 + %tmp2 = load <16 x i8>, ptr addrspace(1) %arg, align 16 %tmp3 = extractelement <16 x i8> %tmp2, i64 4 %tmp6 = extractelement <16 x i8> %tmp2, i64 11 - %tmp10 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %arg, i64 2 - %tmp11 = bitcast <8 x i8> addrspace(1)* %tmp10 to <16 x i8> addrspace(1)* - %tmp12 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp11, align 16 + %tmp10 = getelementptr inbounds <8 x i8>, ptr addrspace(1) %arg, i64 2 + %tmp12 = load <16 x i8>, ptr addrspace(1) %tmp10, align 16 %tmp13 = extractelement <16 x i8> %tmp12, i64 7 %tmp17 = extractelement <16 x i8> %tmp12, i64 12 - %tmp21 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %arg, i64 4 - %tmp22 = bitcast <8 x i8> addrspace(1)* %tmp21 to <16 x i8> addrspace(1)* - %tmp23 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp22, align 16 + %tmp21 = getelementptr inbounds <8 x i8>, ptr addrspace(1) %arg, i64 4 + %tmp23 = load <16 x i8>, ptr addrspace(1) %tmp21, align 16 %tmp24 = extractelement <16 x i8> %tmp23, i64 3 %tmp1 = insertelement <16 x i8> undef, i8 %tmp3, i32 2 %tmp4 = insertelement <16 x i8> %tmp1, i8 0, i32 3 @@ -50,7 +47,7 @@ bb: %tmp16 = insertelement <16 x i8> %tmp15, i8 0, i32 10 %tmp18 = insertelement <16 x i8> %tmp16, i8 0, i32 11 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp24, i32 12 - store <16 x i8> %tmp19, <16 x i8> addrspace(1)* %arg1, align 1 + store <16 x i8> %tmp19, ptr addrspace(1) %arg1, align 1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/anyext.ll b/llvm/test/CodeGen/AMDGPU/anyext.ll index 34c35ab..68a8b0a 100644 --- a/llvm/test/CodeGen/AMDGPU/anyext.ll +++ b/llvm/test/CodeGen/AMDGPU/anyext.ll @@ -6,7 +6,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone -define amdgpu_kernel void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) #0 { +define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 { ; GCN-LABEL: anyext_i1_i32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s4, s[0:1], 0xb @@ -55,11 +55,11 @@ entry: %tmp2 = xor i8 %tmp1, -1 %tmp3 = and i8 %tmp2, 1 %tmp4 = zext i8 %tmp3 to i32 - store i32 %tmp4, i32 addrspace(1)* %out + store i32 %tmp4, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %a, i16 addrspace(1)* %b) #0 { +define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 { ; GCN-LABEL: s_anyext_i16_i32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -132,16 +132,16 @@ define amdgpu_kernel void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspac entry: %tid.x = call i32 @llvm.amdgcn.workitem.id.x() %tid.y = call i32 @llvm.amdgcn.workitem.id.y() - %a.ptr = getelementptr i16, i16 addrspace(1)* %a, i32 %tid.x - %b.ptr = getelementptr i16, i16 addrspace(1)* %b, i32 %tid.y - %a.l = load i16, i16 addrspace(1)* %a.ptr - %b.l = load i16, i16 addrspace(1)* %b.ptr + %a.ptr = getelementptr i16, ptr addrspace(1) %a, i32 %tid.x + %b.ptr = getelementptr i16, ptr addrspace(1) %b, i32 %tid.y + %a.l = load i16, ptr addrspace(1) %a.ptr + %b.l = load i16, ptr addrspace(1) %b.ptr %tmp = add i16 %a.l, %b.l %tmp1 = trunc i16 %tmp to i8 %tmp2 = xor i8 %tmp1, -1 %tmp3 = and i8 %tmp2, 1 %tmp4 = zext i8 %tmp3 to i32 - store i32 %tmp4, i32 addrspace(1)* %out + store i32 %tmp4, ptr addrspace(1) %out ret void } @@ -186,7 +186,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 { ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm bb: - %tmp = load i16, i16 addrspace(1)* undef, align 2 + %tmp = load i16, ptr addrspace(1) undef, align 2 %tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 1 %tmp4 = and <2 x i16> %tmp2, %tmp5 = zext <2 x i16> %tmp4 to <2 x i32> @@ -196,7 +196,7 @@ bb: %tmp10 = fcmp oeq <2 x float> %tmp8, zeroinitializer %tmp11 = zext <2 x i1> %tmp10 to <2 x i8> %tmp12 = extractelement <2 x i8> %tmp11, i32 1 - store i8 %tmp12, i8 addrspace(1)* undef, align 1 + store i8 %tmp12, ptr addrspace(1) undef, align 1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll index 155de53..31e1ace 100644 --- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll +++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll @@ -8,11 +8,11 @@ ; SI-LLC-LABEL: {{^}}test: ; SI-LLC: s_mul_i32 ; SI-LLC-NOT: mul -define amdgpu_kernel void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) { +define amdgpu_kernel void @test(ptr addrspace(1) nocapture readonly %in, i32 %a, i8 %b) { entry: %0 = mul nsw i32 %a, 3 %1 = sext i32 %0 to i64 - %2 = getelementptr i8, i8 addrspace(1)* %in, i64 %1 - store i8 %b, i8 addrspace(1)* %2 + %2 = getelementptr i8, ptr addrspace(1) %in, i64 %1 + store i8 %b, ptr addrspace(1) %2 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll index cd4ac4d..e85dfed 100644 --- a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll @@ -7,16 +7,16 @@ ; GCN-NOT: v_cndmask_b32_e64 v{{[0-9]+}}, {{0|-1}}, {{0|-1}} ; GCN-NOT: v_and_b32_e32 -define amdgpu_kernel void @and_i1_sext_bool(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @and_i1_sext_bool(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = sext i1 %cmp to i32 %and = and i32 %v, %ext - store i32 %and, i32 addrspace(1)* %gep, align 4 + store i32 %and, ptr addrspace(1) %gep, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll index ce04136..971f2e3 100644 --- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll +++ b/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll @@ -14,9 +14,9 @@ declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone ; CM: MEM_RAT_CACHELESS STORE_DWORD [[RES:T[0-9]+\.[XYZW]]] ; EGCM: VTX_READ_16 [[VAL:T[0-9]+\.[XYZW]]] ; EGCM: FLT16_TO_FLT32{{[ *]*}}[[RES]], [[VAL]] -define amdgpu_kernel void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { - %val = load i16, i16 addrspace(1)* %in, align 2 +define amdgpu_kernel void @test_convert_fp16_to_fp32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { + %val = load i16, ptr addrspace(1) %in, align 2 %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone - store float %cvt, float addrspace(1)* %out, align 4 + store float %cvt, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll index 70f0c0c..5bb6841 100644 --- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll +++ b/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll @@ -8,9 +8,9 @@ declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone ; GCN: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]] ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]] ; GCN: buffer_store_dwordx2 [[RESULT]] -define amdgpu_kernel void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { - %val = load i16, i16 addrspace(1)* %in, align 2 +define amdgpu_kernel void @test_convert_fp16_to_fp64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { + %val = load i16, ptr addrspace(1) %in, align 2 %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone - store double %cvt, double addrspace(1)* %out, align 4 + store double %cvt, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll b/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll index 579a145..1786fea 100644 --- a/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll +++ b/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll @@ -12,9 +12,9 @@ declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone ; EG: MEM_RAT MSKOR ; EG: VTX_READ_32 ; EG: FLT32_TO_FLT16 -define amdgpu_kernel void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { - %val = load float, float addrspace(1)* %in, align 4 +define amdgpu_kernel void @test_convert_fp32_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { + %val = load float, ptr addrspace(1) %in, align 4 %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone - store i16 %cvt, i16 addrspace(1)* %out, align 2 + store i16 %cvt, ptr addrspace(1) %out, align 2 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll index a602a04..2c318d7 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll @@ -6,18 +6,18 @@ declare double @llvm.fabs.f64(double) #1 ; FUNC-LABEL: @fp_to_sint_f64_i32 ; SI: v_cvt_i32_f64_e32 -define amdgpu_kernel void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) { +define amdgpu_kernel void @fp_to_sint_f64_i32(ptr addrspace(1) %out, double %in) { %result = fptosi double %in to i32 - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: @fp_to_sint_v2f64_v2i32 ; SI: v_cvt_i32_f64_e32 ; SI: v_cvt_i32_f64_e32 -define amdgpu_kernel void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) { +define amdgpu_kernel void @fp_to_sint_v2f64_v2i32(ptr addrspace(1) %out, <2 x double> %in) { %result = fptosi <2 x double> %in to <2 x i32> - store <2 x i32> %result, <2 x i32> addrspace(1)* %out + store <2 x i32> %result, ptr addrspace(1) %out ret void } @@ -26,9 +26,9 @@ define amdgpu_kernel void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, ; SI: v_cvt_i32_f64_e32 ; SI: v_cvt_i32_f64_e32 ; SI: v_cvt_i32_f64_e32 -define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %in) { +define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(ptr addrspace(1) %out, <4 x double> %in) { %result = fptosi <4 x double> %in to <4 x i32> - store <4 x i32> %result, <4 x i32> addrspace(1)* %out + store <4 x i32> %result, ptr addrspace(1) %out ret void } @@ -47,29 +47,29 @@ define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, ; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]] ; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]] ; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]] -define amdgpu_kernel void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) { +define amdgpu_kernel void @fp_to_sint_i64_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid - %val = load double, double addrspace(1)* %gep, align 8 + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid + %val = load double, ptr addrspace(1) %gep, align 8 %cast = fptosi double %val to i64 - store i64 %cast, i64 addrspace(1)* %out, align 8 + store i64 %cast, ptr addrspace(1) %out, align 8 ret void } ; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1: ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @fp_to_sint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { +define amdgpu_kernel void @fp_to_sint_f64_to_i1(ptr addrspace(1) %out, double %in) #0 { %conv = fptosi double %in to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1: ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{\[[0-9]+:[0-9]+\]}}| -define amdgpu_kernel void @fp_to_sint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { +define amdgpu_kernel void @fp_to_sint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) #0 { %in.fabs = call double @llvm.fabs.f64(double %in) %conv = fptosi double %in.fabs to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll index 18b0fe3..e32d5d7 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll @@ -5,7 +5,7 @@ declare float @llvm.fabs.f32(float) #1 -define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fp_to_sint_i32(ptr addrspace(1) %out, float %in) { ; SI-LABEL: fp_to_sint_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -40,11 +40,11 @@ define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptosi float %in to i32 - store i32 %conv, i32 addrspace(1)* %out + store i32 %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fp_to_sint_i32_fabs(ptr addrspace(1) %out, float %in) { ; SI-LABEL: fp_to_sint_i32_fabs: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -80,11 +80,11 @@ define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %in.fabs = call float @llvm.fabs.f32(float %in) %conv = fptosi float %in.fabs to i32 - store i32 %conv, i32 addrspace(1)* %out + store i32 %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { +define amdgpu_kernel void @fp_to_sint_v2i32(ptr addrspace(1) %out, <2 x float> %in) { ; SI-LABEL: fp_to_sint_v2i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -125,11 +125,11 @@ define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x f ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = fptosi <2 x float> %in to <2 x i32> - store <2 x i32> %result, <2 x i32> addrspace(1)* %out + store <2 x i32> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +define amdgpu_kernel void @fp_to_sint_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; SI-LABEL: fp_to_sint_v4i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -183,14 +183,14 @@ define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x f ; EG-NEXT: FLT_TO_INT T0.X, PV.W, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %value = load <4 x float>, <4 x float> addrspace(1) * %in + %value = load <4 x float>, ptr addrspace(1) %in %result = fptosi <4 x float> %value to <4 x i32> - store <4 x i32> %result, <4 x i32> addrspace(1)* %out + store <4 x i32> %result, ptr addrspace(1) %out ret void } ; Check that the compiler doesn't crash with a "cannot select" error -define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fp_to_sint_i64 (ptr addrspace(1) %out, float %in) { ; SI-LABEL: fp_to_sint_i64: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -287,11 +287,11 @@ define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: %0 = fptosi float %in to i64 - store i64 %0, i64 addrspace(1)* %out + store i64 %0, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { +define amdgpu_kernel void @fp_to_sint_v2i64(ptr addrspace(1) %out, <2 x float> %x) { ; SI-LABEL: fp_to_sint_v2i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -446,11 +446,11 @@ define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x f ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptosi <2 x float> %x to <2 x i64> - store <2 x i64> %conv, <2 x i64> addrspace(1)* %out + store <2 x i64> %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { +define amdgpu_kernel void @fp_to_sint_v4i64(ptr addrspace(1) %out, <4 x float> %x) { ; SI-LABEL: fp_to_sint_v4i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -733,11 +733,11 @@ define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x f ; EG-NEXT: LSHR * T0.X, PV.W, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptosi <4 x float> %x to <4 x i64> - store <4 x i64> %conv, <4 x i64> addrspace(1)* %out + store <4 x i64> %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { +define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in) #0 { ; SI-LABEL: fp_to_uint_f32_to_i1: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -783,11 +783,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptosi float %in to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { +define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, float %in) #0 { ; SI-LABEL: fp_to_uint_fabs_f32_to_i1: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -834,11 +834,11 @@ define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, floa ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %in.fabs = call float @llvm.fabs.f32(float %in) %conv = fptosi float %in.fabs to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 { +define amdgpu_kernel void @fp_to_sint_f32_i16(ptr addrspace(1) %out, float %in) #0 { ; SI-LABEL: fp_to_sint_f32_i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -883,7 +883,7 @@ define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %sint = fptosi float %in to i16 - store i16 %sint, i16 addrspace(1)* %out + store i16 %sint, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll index 8a86446..ba23dd0 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll @@ -6,18 +6,18 @@ declare double @llvm.fabs.f64(double) #1 ; SI-LABEL: {{^}}fp_to_uint_i32_f64: ; SI: v_cvt_u32_f64_e32 -define amdgpu_kernel void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) { +define amdgpu_kernel void @fp_to_uint_i32_f64(ptr addrspace(1) %out, double %in) { %cast = fptoui double %in to i32 - store i32 %cast, i32 addrspace(1)* %out, align 4 + store i32 %cast, ptr addrspace(1) %out, align 4 ret void } ; SI-LABEL: @fp_to_uint_v2i32_v2f64 ; SI: v_cvt_u32_f64_e32 ; SI: v_cvt_u32_f64_e32 -define amdgpu_kernel void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %in) { +define amdgpu_kernel void @fp_to_uint_v2i32_v2f64(ptr addrspace(1) %out, <2 x double> %in) { %cast = fptoui <2 x double> %in to <2 x i32> - store <2 x i32> %cast, <2 x i32> addrspace(1)* %out, align 8 + store <2 x i32> %cast, ptr addrspace(1) %out, align 8 ret void } @@ -26,9 +26,9 @@ define amdgpu_kernel void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, ; SI: v_cvt_u32_f64_e32 ; SI: v_cvt_u32_f64_e32 ; SI: v_cvt_u32_f64_e32 -define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %in) { +define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(ptr addrspace(1) %out, <4 x double> %in) { %cast = fptoui <4 x double> %in to <4 x i32> - store <4 x i32> %cast, <4 x i32> addrspace(1)* %out, align 8 + store <4 x i32> %cast, ptr addrspace(1) %out, align 8 ret void } @@ -47,43 +47,43 @@ define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, ; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]] ; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]] ; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]] -define amdgpu_kernel void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) { +define amdgpu_kernel void @fp_to_uint_i64_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid - %val = load double, double addrspace(1)* %gep, align 8 + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid + %val = load double, ptr addrspace(1) %gep, align 8 %cast = fptoui double %val to i64 - store i64 %cast, i64 addrspace(1)* %out, align 4 + store i64 %cast, ptr addrspace(1) %out, align 4 ret void } ; SI-LABEL: @fp_to_uint_v2i64_v2f64 -define amdgpu_kernel void @fp_to_uint_v2i64_v2f64(<2 x i64> addrspace(1)* %out, <2 x double> %in) { +define amdgpu_kernel void @fp_to_uint_v2i64_v2f64(ptr addrspace(1) %out, <2 x double> %in) { %cast = fptoui <2 x double> %in to <2 x i64> - store <2 x i64> %cast, <2 x i64> addrspace(1)* %out, align 16 + store <2 x i64> %cast, ptr addrspace(1) %out, align 16 ret void } ; SI-LABEL: @fp_to_uint_v4i64_v4f64 -define amdgpu_kernel void @fp_to_uint_v4i64_v4f64(<4 x i64> addrspace(1)* %out, <4 x double> %in) { +define amdgpu_kernel void @fp_to_uint_v4i64_v4f64(ptr addrspace(1) %out, <4 x double> %in) { %cast = fptoui <4 x double> %in to <4 x i64> - store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32 + store <4 x i64> %cast, ptr addrspace(1) %out, align 32 ret void } ; FUNC-LABEL: {{^}}fp_to_uint_f64_to_i1: ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @fp_to_uint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { +define amdgpu_kernel void @fp_to_uint_f64_to_i1(ptr addrspace(1) %out, double %in) #0 { %conv = fptoui double %in to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}fp_to_uint_fabs_f64_to_i1: ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{\[[0-9]+:[0-9]+\]}}| -define amdgpu_kernel void @fp_to_uint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { +define amdgpu_kernel void @fp_to_uint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) #0 { %in.fabs = call double @llvm.fabs.f64(double %in) %conv = fptoui double %in.fabs to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll index ab90330..ca64382 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll @@ -5,7 +5,7 @@ declare float @llvm.fabs.f32(float) #1 -define amdgpu_kernel void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fp_to_uint_f32_to_i32 (ptr addrspace(1) %out, float %in) { ; SI-LABEL: fp_to_uint_f32_to_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -40,11 +40,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float ; EG-NEXT: FLT_TO_UINT * T1.X, PV.W, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptoui float %in to i32 - store i32 %conv, i32 addrspace(1)* %out + store i32 %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { +define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(ptr addrspace(1) %out, <2 x float> %in) { ; SI-LABEL: fp_to_uint_v2f32_to_v2i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -85,11 +85,11 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %ou ; EG-NEXT: FLT_TO_UINT * T0.X, T1.W, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = fptoui <2 x float> %in to <2 x i32> - store <2 x i32> %result, <2 x i32> addrspace(1)* %out + store <2 x i32> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; SI-LABEL: fp_to_uint_v4f32_to_v4i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -143,13 +143,13 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %ou ; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, ; EG-NEXT: FLT_TO_UINT * T0.X, PV.W, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %value = load <4 x float>, <4 x float> addrspace(1) * %in + %value = load <4 x float>, ptr addrspace(1) %in %result = fptoui <4 x float> %value to <4 x i32> - store <4 x i32> %result, <4 x i32> addrspace(1)* %out + store <4 x i32> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %x) { +define amdgpu_kernel void @fp_to_uint_f32_to_i64(ptr addrspace(1) %out, float %x) { ; SI-LABEL: fp_to_uint_f32_to_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -233,11 +233,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float % ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptoui float %x to i64 - store i64 %conv, i64 addrspace(1)* %out + store i64 %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { +define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(ptr addrspace(1) %out, <2 x float> %x) { ; SI-LABEL: fp_to_uint_v2f32_to_v2i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -370,11 +370,11 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %ou ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptoui <2 x float> %x to <2 x i64> - store <2 x i64> %conv, <2 x i64> addrspace(1)* %out + store <2 x i64> %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { +define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(ptr addrspace(1) %out, <4 x float> %x) { ; SI-LABEL: fp_to_uint_v4f32_to_v4i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -615,11 +615,11 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %ou ; EG-NEXT: LSHR * T0.X, PV.W, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptoui <4 x float> %x to <4 x i64> - store <4 x i64> %conv, <4 x i64> addrspace(1)* %out + store <4 x i64> %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { +define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in) #0 { ; SI-LABEL: fp_to_uint_f32_to_i1: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -665,11 +665,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptoui float %in to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { +define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, float %in) #0 { ; SI-LABEL: fp_to_uint_fabs_f32_to_i1: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -716,11 +716,11 @@ define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, floa ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %in.fabs = call float @llvm.fabs.f32(float %in) %conv = fptoui float %in.fabs to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float %in) #0 { +define amdgpu_kernel void @fp_to_uint_f32_to_i16(ptr addrspace(1) %out, float %in) #0 { ; SI-LABEL: fp_to_uint_f32_to_i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -764,7 +764,7 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float % ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %uint = fptoui float %in to i16 - store i16 %uint, i16 addrspace(1)* %out + store i16 %uint, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll index 91dac92..0572f9a 100644 --- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll @@ -8,12 +8,12 @@ ; GCN: buffer_store_dword v[[R_F32]] ; GCN: s_endpgm define amdgpu_kernel void @fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a) #0 { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = fpext half %a.val to float - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } @@ -24,12 +24,12 @@ entry: ; GCN: buffer_store_dwordx2 v[[[R_F64_0]]:[[R_F64_1]]] ; GCN: s_endpgm define amdgpu_kernel void @fpext_f16_to_f64( - double addrspace(1)* %r, - half addrspace(1)* %a) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a) #0 { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = fpext half %a.val to double - store double %r.val, double addrspace(1)* %r + store double %r.val, ptr addrspace(1) %r ret void } @@ -43,12 +43,12 @@ entry: ; GCN: s_endpgm define amdgpu_kernel void @fpext_v2f16_to_v2f32( - <2 x float> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) #0 { + ptr addrspace(1) %r, + ptr addrspace(1) %a) #0 { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = fpext <2 x half> %a.val to <2 x float> - store <2 x float> %r.val, <2 x float> addrspace(1)* %r + store <2 x float> %r.val, ptr addrspace(1) %r ret void } @@ -65,23 +65,23 @@ entry: ; GCN: s_endpgm define amdgpu_kernel void @fpext_v2f16_to_v2f64( - <2 x double> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = fpext <2 x half> %a.val to <2 x double> - store <2 x double> %r.val, <2 x double> addrspace(1)* %r + store <2 x double> %r.val, ptr addrspace(1) %r ret void } ; GCN-LABEL: {{^}}s_fneg_fpext_f16_to_f32: ; GCN: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @s_fneg_fpext_f16_to_f32(float addrspace(1)* %r, i32 %a) { +define amdgpu_kernel void @s_fneg_fpext_f16_to_f32(ptr addrspace(1) %r, i32 %a) { entry: %a.trunc = trunc i32 %a to i16 %a.val = bitcast i16 %a.trunc to half %r.val = fpext half %a.val to float - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } @@ -89,13 +89,13 @@ entry: ; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] ; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -[[A]] define amdgpu_kernel void @fneg_fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %a.neg = fsub half -0.0, %a.val %r.val = fpext half %a.neg to float - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } @@ -103,13 +103,13 @@ entry: ; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] ; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, |[[A]]| define amdgpu_kernel void @fabs_fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %a.fabs = call half @llvm.fabs.f16(half %a.val) %r.val = fpext half %a.fabs to float - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } @@ -117,14 +117,14 @@ entry: ; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] ; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|[[A]]| define amdgpu_kernel void @fneg_fabs_fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %a.fabs = call half @llvm.fabs.f16(half %a.val) %a.fneg.fabs = fsub half -0.0, %a.fabs %r.val = fpext half %a.fneg.fabs to float - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } @@ -139,14 +139,14 @@ entry: ; GCN: store_dword [[CVT]] ; GCN: store_short [[XOR]] define amdgpu_kernel void @fneg_multi_use_fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %a.neg = fsub half -0.0, %a.val %r.val = fpext half %a.neg to float - store volatile float %r.val, float addrspace(1)* %r - store volatile half %a.neg, half addrspace(1)* undef + store volatile float %r.val, ptr addrspace(1) %r + store volatile half %a.neg, ptr addrspace(1) undef ret void } @@ -163,15 +163,15 @@ entry: ; GCN: buffer_store_dword [[CVTA_NEG]] ; GCN: buffer_store_short [[MUL]] define amdgpu_kernel void @fneg_multi_foldable_use_fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %a.neg = fsub half -0.0, %a.val %r.val = fpext half %a.neg to float %mul = fmul half %a.neg, %a.val - store volatile float %r.val, float addrspace(1)* %r - store volatile half %mul, half addrspace(1)* undef + store volatile float %r.val, ptr addrspace(1) %r + store volatile half %mul, ptr addrspace(1) undef ret void } @@ -185,14 +185,14 @@ entry: ; GCN: store_dword [[CVT]] ; GCN: store_short [[XOR]] define amdgpu_kernel void @fabs_multi_use_fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %a.fabs = call half @llvm.fabs.f16(half %a.val) %r.val = fpext half %a.fabs to float - store volatile float %r.val, float addrspace(1)* %r - store volatile half %a.fabs, half addrspace(1)* undef + store volatile float %r.val, ptr addrspace(1) %r + store volatile half %a.fabs, ptr addrspace(1) undef ret void } @@ -209,15 +209,15 @@ entry: ; GCN: buffer_store_dword [[ABS_A]] ; GCN: buffer_store_short [[MUL]] define amdgpu_kernel void @fabs_multi_foldable_use_fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %a.fabs = call half @llvm.fabs.f16(half %a.val) %r.val = fpext half %a.fabs to float %mul = fmul half %a.fabs, %a.val - store volatile float %r.val, float addrspace(1)* %r - store volatile half %mul, half addrspace(1)* undef + store volatile float %r.val, ptr addrspace(1) %r + store volatile half %mul, ptr addrspace(1) undef ret void } @@ -231,15 +231,15 @@ entry: ; GCN: buffer_store_dword [[CVT]] ; GCN: buffer_store_short [[OR]] define amdgpu_kernel void @fabs_fneg_multi_use_fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %a.fabs = call half @llvm.fabs.f16(half %a.val) %a.fneg.fabs = fsub half -0.0, %a.fabs %r.val = fpext half %a.fneg.fabs to float - store volatile float %r.val, float addrspace(1)* %r - store volatile half %a.fneg.fabs, half addrspace(1)* undef + store volatile float %r.val, ptr addrspace(1) %r + store volatile half %a.fneg.fabs, ptr addrspace(1) undef ret void } @@ -256,16 +256,16 @@ entry: ; GCN: buffer_store_dword [[FABS_FNEG]] ; GCN: buffer_store_short [[MUL]] define amdgpu_kernel void @fabs_fneg_multi_foldable_use_fpext_f16_to_f32( - float addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %a.fabs = call half @llvm.fabs.f16(half %a.val) %a.fneg.fabs = fsub half -0.0, %a.fabs %r.val = fpext half %a.fneg.fabs to float %mul = fmul half %a.fneg.fabs, %a.val - store volatile float %r.val, float addrspace(1)* %r - store volatile half %mul, half addrspace(1)* undef + store volatile float %r.val, ptr addrspace(1) %r + store volatile half %mul, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fpext.ll b/llvm/test/CodeGen/AMDGPU/fpext.ll index b11e2ea..cc5291c 100644 --- a/llvm/test/CodeGen/AMDGPU/fpext.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext.ll @@ -3,18 +3,18 @@ ; FUNC-LABEL: {{^}}fpext_f32_to_f64: ; SI: v_cvt_f64_f32_e32 {{v\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -define amdgpu_kernel void @fpext_f32_to_f64(double addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fpext_f32_to_f64(ptr addrspace(1) %out, float %in) { %result = fpext float %in to double - store double %result, double addrspace(1)* %out + store double %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}fpext_v2f32_to_v2f64: ; SI: v_cvt_f64_f32_e32 ; SI: v_cvt_f64_f32_e32 -define amdgpu_kernel void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x float> %in) { +define amdgpu_kernel void @fpext_v2f32_to_v2f64(ptr addrspace(1) %out, <2 x float> %in) { %result = fpext <2 x float> %in to <2 x double> - store <2 x double> %result, <2 x double> addrspace(1)* %out + store <2 x double> %result, ptr addrspace(1) %out ret void } @@ -22,9 +22,9 @@ define amdgpu_kernel void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out, ; SI: v_cvt_f64_f32_e32 ; SI: v_cvt_f64_f32_e32 ; SI: v_cvt_f64_f32_e32 -define amdgpu_kernel void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out, <3 x float> %in) { +define amdgpu_kernel void @fpext_v3f32_to_v3f64(ptr addrspace(1) %out, <3 x float> %in) { %result = fpext <3 x float> %in to <3 x double> - store <3 x double> %result, <3 x double> addrspace(1)* %out + store <3 x double> %result, ptr addrspace(1) %out ret void } @@ -33,9 +33,9 @@ define amdgpu_kernel void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out, ; SI: v_cvt_f64_f32_e32 ; SI: v_cvt_f64_f32_e32 ; SI: v_cvt_f64_f32_e32 -define amdgpu_kernel void @fpext_v4f32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x float> %in) { +define amdgpu_kernel void @fpext_v4f32_to_v4f64(ptr addrspace(1) %out, <4 x float> %in) { %result = fpext <4 x float> %in to <4 x double> - store <4 x double> %result, <4 x double> addrspace(1)* %out + store <4 x double> %result, ptr addrspace(1) %out ret void } @@ -48,8 +48,8 @@ define amdgpu_kernel void @fpext_v4f32_to_v4f64(<4 x double> addrspace(1)* %out, ; SI: v_cvt_f64_f32_e32 ; SI: v_cvt_f64_f32_e32 ; SI: v_cvt_f64_f32_e32 -define amdgpu_kernel void @fpext_v8f32_to_v8f64(<8 x double> addrspace(1)* %out, <8 x float> %in) { +define amdgpu_kernel void @fpext_v8f32_to_v8f64(ptr addrspace(1) %out, <8 x float> %in) { %result = fpext <8 x float> %in to <8 x double> - store <8 x double> %result, <8 x double> addrspace(1)* %out + store <8 x double> %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll index 6fc9b7f..03792ca 100644 --- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll @@ -9,12 +9,12 @@ ; GCN: buffer_store_short v[[R_I16]] ; GCN: s_endpgm define amdgpu_kernel void @fptosi_f16_to_i16( - i16 addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = fptosi half %a.val to i16 - store i16 %r.val, i16 addrspace(1)* %r + store i16 %r.val, ptr addrspace(1) %r ret void } @@ -25,12 +25,12 @@ entry: ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm define amdgpu_kernel void @fptosi_f16_to_i32( - i32 addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = fptosi half %a.val to i32 - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } @@ -45,12 +45,12 @@ entry: ; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]] ; GCN: s_endpgm define amdgpu_kernel void @fptosi_f16_to_i64( - i64 addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = fptosi half %a.val to i64 - store i64 %r.val, i64 addrspace(1)* %r + store i64 %r.val, ptr addrspace(1) %r ret void } @@ -74,12 +74,12 @@ entry: ; GCN: s_endpgm define amdgpu_kernel void @fptosi_v2f16_to_v2i16( - <2 x i16> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = fptosi <2 x half> %a.val to <2 x i16> - store <2 x i16> %r.val, <2 x i16> addrspace(1)* %r + store <2 x i16> %r.val, ptr addrspace(1) %r ret void } @@ -93,12 +93,12 @@ entry: ; GCN: buffer_store_dwordx2 ; GCN: s_endpgm define amdgpu_kernel void @fptosi_v2f16_to_v2i32( - <2 x i32> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = fptosi <2 x half> %a.val to <2 x i32> - store <2 x i32> %r.val, <2 x i32> addrspace(1)* %r + store <2 x i32> %r.val, ptr addrspace(1) %r ret void } @@ -124,12 +124,12 @@ entry: ; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]] ; GCN: s_endpgm define amdgpu_kernel void @fptosi_v2f16_to_v2i64( - <2 x i64> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = fptosi <2 x half> %a.val to <2 x i64> - store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r + store <2 x i64> %r.val, ptr addrspace(1) %r ret void } @@ -139,9 +139,9 @@ entry: ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}} ; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[4:5] -define amdgpu_kernel void @fptosi_f16_to_i1(i1 addrspace(1)* %out, half %in) { +define amdgpu_kernel void @fptosi_f16_to_i1(ptr addrspace(1) %out, half %in) { entry: %conv = fptosi half %in to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll index bf8677b..48b76eb 100644 --- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll @@ -9,12 +9,12 @@ ; GCN: buffer_store_short v[[R_I16]] ; GCN: s_endpgm define amdgpu_kernel void @fptoui_f16_to_i16( - i16 addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = fptoui half %a.val to i16 - store i16 %r.val, i16 addrspace(1)* %r + store i16 %r.val, ptr addrspace(1) %r ret void } @@ -25,12 +25,12 @@ entry: ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm define amdgpu_kernel void @fptoui_f16_to_i32( - i32 addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = fptoui half %a.val to i32 - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } @@ -45,12 +45,12 @@ entry: ; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]] ; GCN: s_endpgm define amdgpu_kernel void @fptoui_f16_to_i64( - i64 addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = fptoui half %a.val to i64 - store i64 %r.val, i64 addrspace(1)* %r + store i64 %r.val, ptr addrspace(1) %r ret void } @@ -73,12 +73,12 @@ entry: ; GCN: s_endpgm define amdgpu_kernel void @fptoui_v2f16_to_v2i16( - <2 x i16> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = fptoui <2 x half> %a.val to <2 x i16> - store <2 x i16> %r.val, <2 x i16> addrspace(1)* %r + store <2 x i16> %r.val, ptr addrspace(1) %r ret void } @@ -92,12 +92,12 @@ entry: ; GCN: buffer_store_dwordx2 ; GCN: s_endpgm define amdgpu_kernel void @fptoui_v2f16_to_v2i32( - <2 x i32> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = fptoui <2 x half> %a.val to <2 x i32> - store <2 x i32> %r.val, <2 x i32> addrspace(1)* %r + store <2 x i32> %r.val, ptr addrspace(1) %r ret void } @@ -120,12 +120,12 @@ entry: ; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]] ; GCN: s_endpgm define amdgpu_kernel void @fptoui_v2f16_to_v2i64( - <2 x i64> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %a.val = load <2 x half>, ptr addrspace(1) %a %r.val = fptoui <2 x half> %a.val to <2 x i64> - store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r + store <2 x i64> %r.val, ptr addrspace(1) %r ret void } @@ -135,9 +135,9 @@ entry: ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} ; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[4:5] -define amdgpu_kernel void @fptoui_f16_to_i1(i1 addrspace(1)* %out, half %in) { +define amdgpu_kernel void @fptoui_f16_to_i1(ptr addrspace(1) %out, half %in) { entry: %conv = fptoui half %in to i1 - store i1 %conv, i1 addrspace(1)* %out + store i1 %conv, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll b/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll index d501be5..8300436 100644 --- a/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll +++ b/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll @@ -2,10 +2,10 @@ ;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) { - %1 = load i8, i8 addrspace(1)* %in +define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %1 = load i8, ptr addrspace(1) %in %2 = uitofp i8 %1 to double %3 = fptrunc double %2 to float - store float %3, float addrspace(1)* %out + store float %3, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll index eadce22..0c2d669 100644 --- a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll @@ -5,7 +5,7 @@ ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_sgt_true_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_sgt_true_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -15,7 +15,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -27,7 +27,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_sgt_true_sext_swap(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_sgt_true_sext_swap(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -37,7 +37,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -49,7 +49,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_ne_true_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_ne_true_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -59,7 +59,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -71,7 +71,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_ult_true_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_ult_true_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -81,7 +81,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -93,7 +93,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_eq_true_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_eq_true_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -103,7 +103,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -115,7 +115,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_sle_true_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_sle_true_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -125,7 +125,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -137,7 +137,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_uge_true_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_uge_true_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -147,7 +147,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -159,7 +159,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_eq_false_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_eq_false_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -169,7 +169,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -181,7 +181,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_sge_false_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_sge_false_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -191,7 +191,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -203,7 +203,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_ule_false_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_ule_false_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -213,7 +213,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -225,7 +225,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_ne_false_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_ne_false_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -235,7 +235,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -246,7 +246,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_ugt_false_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_ugt_false_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -256,7 +256,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -267,7 +267,7 @@ endif: ; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]] ; GCN-NOT: v_cndmask_ -define amdgpu_kernel void @setcc_slt_false_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @setcc_slt_false_sext(ptr addrspace(1) nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -277,7 +277,7 @@ bb: br i1 %cond, label %then, label %endif then: - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %endif endif: diff --git a/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll b/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll index 0b780af..91b418a 100644 --- a/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll +++ b/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll @@ -6,10 +6,10 @@ ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] ; EG: SUB_INT {{[* ]*}}[[RES]] ; EG-NOT: BFE -define amdgpu_kernel void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a, i32 %b) { +define amdgpu_kernel void @sext_in_reg_i1_i32_add(ptr addrspace(1) %out, i1 %a, i32 %b) { %sext = sext i1 %a to i32 %res = add i32 %b, %sext - store i32 %res, i32 addrspace(1)* %out + store i32 %res, ptr addrspace(1) %out ret void } @@ -18,9 +18,9 @@ define amdgpu_kernel void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a, ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] ; EG: ADD_INT {{[* ]*}}[[RES]] ; EG-NOT: BFE -define amdgpu_kernel void @sext_in_reg_i1_i32_sub(i32 addrspace(1)* %out, i1 %a, i32 %b) { +define amdgpu_kernel void @sext_in_reg_i1_i32_sub(ptr addrspace(1) %out, i1 %a, i32 %b) { %sext = sext i1 %a to i32 %res = sub i32 %b, %sext - store i32 %res, i32 addrspace(1)* %out + store i32 %res, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll index 30487eac..6376833 100644 --- a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -15,10 +15,10 @@ ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] ; EG: LSHR * [[ADDR]] ; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1 -define amdgpu_kernel void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @sext_in_reg_i1_i32(ptr addrspace(1) %out, i32 %in) #0 { %shl = shl i32 %in, 31 %sext = ashr i32 %shl, 31 - store i32 %sext, i32 addrspace(1)* %out + store i32 %sext, ptr addrspace(1) %out ret void } @@ -32,11 +32,11 @@ define amdgpu_kernel void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) # ; EG: ADD_INT ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal ; EG-NEXT: LSHR * [[ADDR]] -define amdgpu_kernel void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { %c = add i32 %a, %b ; add to prevent folding into extload %shl = shl i32 %c, 24 %ashr = ashr i32 %shl, 24 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } @@ -50,11 +50,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, ; EG: ADD_INT ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal ; EG-NEXT: LSHR * [[ADDR]] -define amdgpu_kernel void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { %c = add i32 %a, %b ; add to prevent folding into extload %shl = shl i32 %c, 16 %ashr = ashr i32 %shl, 16 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } @@ -68,11 +68,11 @@ define amdgpu_kernel void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a ; EG: ADD_INT ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal ; EG-NEXT: LSHR * [[ADDR]] -define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 { +define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(ptr addrspace(1) %out, <1 x i32> %a, <1 x i32> %b) #0 { %c = add <1 x i32> %a, %b ; add to prevent folding into extload %shl = shl <1 x i32> %c, %ashr = ashr <1 x i32> %shl, - store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4 + store <1 x i32> %ashr, ptr addrspace(1) %out, align 4 ret void } @@ -82,11 +82,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]] -define amdgpu_kernel void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i1_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %c = shl i64 %a, %b %shl = shl i64 %c, 63 %ashr = ashr i64 %shl, 63 - store i64 %ashr, i64 addrspace(1)* %out, align 8 + store i64 %ashr, ptr addrspace(1) %out, align 8 ret void } @@ -96,11 +96,11 @@ define amdgpu_kernel void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]] -define amdgpu_kernel void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i8_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %c = shl i64 %a, %b %shl = shl i64 %c, 56 %ashr = ashr i64 %shl, 56 - store i64 %ashr, i64 addrspace(1)* %out, align 8 + store i64 %ashr, ptr addrspace(1) %out, align 8 ret void } @@ -111,11 +111,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]] -define amdgpu_kernel void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i16_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %c = shl i64 %a, %b %shl = shl i64 %c, 48 %ashr = ashr i64 %shl, 48 - store i64 %ashr, i64 addrspace(1)* %out, align 8 + store i64 %ashr, ptr addrspace(1) %out, align 8 ret void } @@ -125,11 +125,11 @@ define amdgpu_kernel void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]] -define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i32_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %c = shl i64 %a, %b %shl = shl i64 %c, 32 %ashr = ashr i64 %shl, 32 - store i64 %ashr, i64 addrspace(1)* %out, align 8 + store i64 %ashr, ptr addrspace(1) %out, align 8 ret void } @@ -140,11 +140,11 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a ; XGCN: buffer_store_dword ; XEG: BFE_INT ; XEG: ASHR -; define amdgpu_kernel void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) #0 { +; define amdgpu_kernel void @sext_in_reg_i8_to_v1i64(ptr addrspace(1) %out, <1 x i64> %a, <1 x i64> %b) #0 { ; %c = add <1 x i64> %a, %b ; %shl = shl <1 x i64> %c, ; %ashr = ashr <1 x i64> %shl, -; store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8 +; store <1 x i64> %ashr, ptr addrspace(1) %out, align 8 ; ret void ; } @@ -160,18 +160,18 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a ; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]] ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { +define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid - %a = load i64, i64 addrspace(1)* %a.gep, align 8 - %b = load i64, i64 addrspace(1)* %b.gep, align 8 + %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid + %a = load i64, ptr addrspace(1) %a.gep, align 8 + %b = load i64, ptr addrspace(1) %b.gep, align 8 %c = shl i64 %a, %b %shl = shl i64 %c, 63 %ashr = ashr i64 %shl, 63 - store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 + store i64 %ashr, ptr addrspace(1) %out.gep, align 8 ret void } @@ -187,18 +187,18 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 a ; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]] ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { +define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid - %a = load i64, i64 addrspace(1)* %a.gep, align 8 - %b = load i64, i64 addrspace(1)* %b.gep, align 8 + %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid + %a = load i64, ptr addrspace(1) %a.gep, align 8 + %b = load i64, ptr addrspace(1) %b.gep, align 8 %c = shl i64 %a, %b %shl = shl i64 %c, 56 %ashr = ashr i64 %shl, 56 - store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 + store i64 %ashr, ptr addrspace(1) %out.gep, align 8 ret void } @@ -214,18 +214,18 @@ define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 a ; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]] ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { +define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid - %a = load i64, i64 addrspace(1)* %a.gep, align 8 - %b = load i64, i64 addrspace(1)* %b.gep, align 8 + %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid + %a = load i64, ptr addrspace(1) %a.gep, align 8 + %b = load i64, ptr addrspace(1) %b.gep, align 8 %c = shl i64 %a, %b %shl = shl i64 %c, 48 %ashr = ashr i64 %shl, 48 - store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 + store i64 %ashr, ptr addrspace(1) %out.gep, align 8 ret void } @@ -238,18 +238,18 @@ define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 ; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[SHR]]] -define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { +define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid - %a = load i64, i64 addrspace(1)* %a.gep, align 8 - %b = load i64, i64 addrspace(1)* %b.gep, align 8 + %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid + %a = load i64, ptr addrspace(1) %a.gep, align 8 + %b = load i64, ptr addrspace(1) %b.gep, align 8 %c = shl i64 %a, %b %shl = shl i64 %c, 32 %ashr = ashr i64 %shl, 32 - store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 + store i64 %ashr, ptr addrspace(1) %out.gep, align 8 ret void } @@ -264,11 +264,11 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 ; EG: LSHL ; EG: ASHR [[RES]] ; EG: LSHR {{\*?}} [[ADDR]] -define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { %c = add i32 %a, %b %x = shl i32 %c, 6 %y = ashr i32 %x, 7 - store i32 %y, i32 addrspace(1)* %out + store i32 %y, ptr addrspace(1) %out ret void } @@ -287,11 +287,11 @@ define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* ; EG: LSHL ; EG: ASHR [[RES]] ; EG: LSHR {{\*?}} [[ADDR]] -define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 { %c = add <2 x i32> %a, %b %x = shl <2 x i32> %c, %y = ashr <2 x i32> %x, - store <2 x i32> %y, <2 x i32> addrspace(1)* %out + store <2 x i32> %y, ptr addrspace(1) %out ret void } @@ -305,11 +305,11 @@ define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addr ; EG: BFE_INT [[RES]] ; EG: BFE_INT [[RES]] ; EG: LSHR {{\*?}} [[ADDR]] -define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 { %c = add <2 x i32> %a, %b ; add to prevent folding into extload %shl = shl <2 x i32> %c, %ashr = ashr <2 x i32> %shl, - store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 + store <2 x i32> %ashr, ptr addrspace(1) %out, align 8 ret void } @@ -326,11 +326,11 @@ define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %ou ; EG: BFE_INT [[RES]] ; EG: BFE_INT [[RES]] ; EG: LSHR {{\*?}} [[ADDR]] -define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) #0 { %c = add <4 x i32> %a, %b ; add to prevent folding into extload %shl = shl <4 x i32> %c, %ashr = ashr <4 x i32> %shl, - store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + store <4 x i32> %ashr, ptr addrspace(1) %out, align 8 ret void } @@ -343,11 +343,11 @@ define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %ou ; EG: BFE_INT [[RES]] ; EG: BFE_INT [[RES]] ; EG: LSHR {{\*?}} [[ADDR]] -define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 { %c = add <2 x i32> %a, %b ; add to prevent folding into extload %shl = shl <2 x i32> %c, %ashr = ashr <2 x i32> %shl, - store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 + store <2 x i32> %ashr, ptr addrspace(1) %out, align 8 ret void } @@ -364,11 +364,11 @@ define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %ou ; EG: BFE_INT [[RES]] ; EG: BFE_INT [[RES]] ; EG: LSHR {{\*?}} [[ADDR]] -define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) #0 { %c = add <4 x i32> %a, %b ; add to prevent folding into extload %shl = shl <4 x i32> %c, %ashr = ashr <4 x i32> %shl, - store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + store <4 x i32> %ashr, ptr addrspace(1) %out, align 8 ret void } @@ -381,35 +381,35 @@ define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %ou ; EG: BFE_INT [[RES]] ; EG: BFE_INT [[RES]] ; EG: LSHR {{\*?}} [[ADDR]] -define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 { %c = add <2 x i32> %a, %b ; add to prevent folding into extload %shl = shl <2 x i32> %c, %ashr = ashr <2 x i32> %shl, - store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 + store <2 x i32> %ashr, ptr addrspace(1) %out, align 8 ret void } ; FUNC-LABEL: {{^}}testcase: -define amdgpu_kernel void @testcase(i8 addrspace(1)* %out, i8 %a) #0 { +define amdgpu_kernel void @testcase(ptr addrspace(1) %out, i8 %a) #0 { %and_a_1 = and i8 %a, 1 %cmp_eq = icmp eq i8 %and_a_1, 0 %cmp_slt = icmp slt i8 %a, 0 %sel0 = select i1 %cmp_slt, i8 0, i8 %a %sel1 = select i1 %cmp_eq, i8 0, i8 %a %xor = xor i8 %sel0, %sel1 - store i8 %xor, i8 addrspace(1)* %out + store i8 %xor, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}testcase_3: -define amdgpu_kernel void @testcase_3(i8 addrspace(1)* %out, i8 %a) #0 { +define amdgpu_kernel void @testcase_3(ptr addrspace(1) %out, i8 %a) #0 { %and_a_1 = and i8 %a, 1 %cmp_eq = icmp eq i8 %and_a_1, 0 %cmp_slt = icmp slt i8 %a, 0 %sel0 = select i1 %cmp_slt, i8 0, i8 %a %sel1 = select i1 %cmp_eq, i8 0, i8 %a %xor = xor i8 %sel0, %sel1 - store i8 %xor, i8 addrspace(1)* %out + store i8 %xor, ptr addrspace(1) %out ret void } @@ -418,26 +418,26 @@ define amdgpu_kernel void @testcase_3(i8 addrspace(1)* %out, i8 %a) #0 { ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 -define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 { - %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16 - %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16 +define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 { + %loada = load <4 x i32>, ptr addrspace(1) %a, align 16 + %loadb = load <4 x i32>, ptr addrspace(1) %b, align 16 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload %shl = shl <4 x i32> %c, %ashr = ashr <4 x i32> %shl, - store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + store <4 x i32> %ashr, ptr addrspace(1) %out, align 8 ret void } ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32: ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 -define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 { - %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16 - %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16 +define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 { + %loada = load <4 x i32>, ptr addrspace(1) %a, align 16 + %loadb = load <4 x i32>, ptr addrspace(1) %b, align 16 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload %shl = shl <4 x i32> %c, %ashr = ashr <4 x i32> %shl, - store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + store <4 x i32> %ashr, ptr addrspace(1) %out, align 8 ret void } @@ -446,14 +446,14 @@ define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1 ; GCN: v_max_i32 ; GCN-NOT: bfe ; GCN: buffer_store_short -define amdgpu_kernel void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) #0 { - %tmp5 = load i8, i8 addrspace(1)* %src, align 1 +define amdgpu_kernel void @sext_in_reg_to_illegal_type(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %src) #0 { + %tmp5 = load i8, ptr addrspace(1) %src, align 1 %tmp2 = sext i8 %tmp5 to i32 %tmp2.5 = icmp sgt i32 %tmp2, 0 %tmp3 = select i1 %tmp2.5, i32 %tmp2, i32 0 %tmp4 = trunc i32 %tmp3 to i8 %tmp6 = sext i8 %tmp4 to i16 - store i16 %tmp6, i16 addrspace(1)* %out, align 2 + store i16 %tmp6, ptr addrspace(1) %out, align 2 ret void } @@ -472,20 +472,20 @@ define amdgpu_kernel void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocaptu ; GCN-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]] ; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]] ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]] -define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 { +define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid - %a = load i64, i64 addrspace(1)* %a.gep, align 8 - %b = load i64, i64 addrspace(1)* %b.gep, align 8 + %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid + %a = load i64, ptr addrspace(1) %a.gep, align 8 + %b = load i64, ptr addrspace(1) %b.gep, align 8 %c = shl i64 %a, %b %shl = shl i64 %c, 63 %ashr = ashr i64 %shl, 63 %and = and i64 %ashr, %s.val - store i64 %and, i64 addrspace(1)* %out.gep, align 8 + store i64 %and, ptr addrspace(1) %out.gep, align 8 ret void } @@ -502,19 +502,19 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %o ; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]] ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]] -define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 { +define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid - %a = load i64, i64 addrspace(1)* %a.gep, align 8 - %b = load i64, i64 addrspace(1)* %b.gep, align 8 + %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid + %a = load i64, ptr addrspace(1) %a.gep, align 8 + %b = load i64, ptr addrspace(1) %b.gep, align 8 %c = shl i64 %a, %b %shl = shl i64 %c, 32 %ashr = ashr i64 %shl, 32 %and = and i64 %ashr, %s.val - store i64 %and, i64 addrspace(1)* %out.gep, align 8 + store i64 %and, ptr addrspace(1) %out.gep, align 8 ret void } @@ -528,12 +528,12 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* % ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15 -define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { - %ld = load i32, i32 addrspace(4)* %ptr +define amdgpu_kernel void @s_sext_in_reg_i1_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 { + %ld = load i32, ptr addrspace(4) %ptr %in = trunc i32 %ld to i16 %shl = shl i16 %in, 15 %sext = ashr i16 %shl, 15 - store i16 %sext, i16 addrspace(1)* %out + store i16 %sext, ptr addrspace(1) %out ret void } @@ -547,12 +547,12 @@ define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addr ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14 -define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { - %ld = load i32, i32 addrspace(4)* %ptr +define amdgpu_kernel void @s_sext_in_reg_i2_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 { + %ld = load i32, ptr addrspace(4) %ptr %in = trunc i32 %ld to i16 %shl = shl i16 %in, 14 %sext = ashr i16 %shl, 14 - store i16 %sext, i16 addrspace(1)* %out + store i16 %sext, ptr addrspace(1) %out ret void } @@ -561,15 +561,15 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addr ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}} ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]] -define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @v_sext_in_reg_i1_i16(ptr addrspace(3) %out, ptr addrspace(1) %ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid - %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid + %gep = getelementptr i16, ptr addrspace(1) %ptr, i32 %tid + %out.gep = getelementptr i16, ptr addrspace(3) %out, i32 %tid - %in = load i16, i16 addrspace(1)* %gep + %in = load i16, ptr addrspace(1) %gep %shl = shl i16 %in, 15 %sext = ashr i16 %shl, 15 - store i16 %sext, i16 addrspace(3)* %out.gep + store i16 %sext, ptr addrspace(3) %out.gep ret void } @@ -582,19 +582,19 @@ define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addr ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}} ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]] -define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind { +define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(ptr addrspace(3) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i16 %s.val) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid - %b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid - %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid - %a = load volatile i16, i16 addrspace(1)* %a.gep, align 2 - %b = load volatile i16, i16 addrspace(1)* %b.gep, align 2 + %a.gep = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid + %b.gep = getelementptr i16, ptr addrspace(1) %bptr, i32 %tid + %out.gep = getelementptr i16, ptr addrspace(3) %out, i32 %tid + %a = load volatile i16, ptr addrspace(1) %a.gep, align 2 + %b = load volatile i16, ptr addrspace(1) %b.gep, align 2 %c = shl i16 %a, %b %shl = shl i16 %c, 15 %ashr = ashr i16 %shl, 15 - store i16 %ashr, i16 addrspace(3)* %out.gep, align 2 + store i16 %ashr, ptr addrspace(3) %out.gep, align 2 ret void } @@ -608,10 +608,10 @@ define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}} ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}} -define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 { +define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(ptr addrspace(1) %out, i16 %in) #0 { %shl = shl i16 %in, 14 %sext = ashr i16 %shl, 14 - store i16 %sext, i16 addrspace(1)* %out + store i16 %sext, ptr addrspace(1) %out ret void } @@ -625,10 +625,10 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}} ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}} -define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 { +define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(ptr addrspace(1) %out, i16 %in) #0 { %shl = shl i16 %in, 8 %sext = ashr i16 %shl, 8 - store i16 %sext, i16 addrspace(1)* %out + store i16 %sext, ptr addrspace(1) %out ret void } @@ -642,10 +642,10 @@ define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}} ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}} -define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 { +define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(ptr addrspace(1) %out, i16 %in) #0 { %shl = shl i16 %in, 1 %sext = ashr i16 %shl, 1 - store i16 %sext, i16 addrspace(1)* %out + store i16 %sext, ptr addrspace(1) %out ret void } @@ -653,11 +653,11 @@ define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]] ; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 15, [[ADD]] ; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 15, [[SHL]] -define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 { %c = add <2 x i16> %a, %b ; add to prevent folding into extload %shl = shl <2 x i16> %c, %ashr = ashr <2 x i16> %shl, - store <2 x i16> %ashr, <2 x i16> addrspace(1)* %out + store <2 x i16> %ashr, ptr addrspace(1) %out ret void } @@ -668,11 +668,11 @@ define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(<2 x i16> addrspace(1)* %ou ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 15, v{{[0-9]+}} ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}} ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}} -define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) #0 { %c = add <3 x i16> %a, %b ; add to prevent folding into extload %shl = shl <3 x i16> %c, %ashr = ashr <3 x i16> %shl, - store <3 x i16> %ashr, <3 x i16> addrspace(1)* %out + store <3 x i16> %ashr, ptr addrspace(1) %out ret void } @@ -680,11 +680,11 @@ define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(<3 x i16> addrspace(1)* %ou ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]] ; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 14, [[ADD]] ; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 14, [[SHL]] -define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 { %c = add <2 x i16> %a, %b ; add to prevent folding into extload %shl = shl <2 x i16> %c, %ashr = ashr <2 x i16> %shl, - store <2 x i16> %ashr, <2 x i16> addrspace(1)* %out + store <2 x i16> %ashr, ptr addrspace(1) %out ret void } @@ -692,11 +692,11 @@ define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(<2 x i16> addrspace(1)* %ou ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]] ; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 8, [[ADD]] ; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 8, [[SHL]] -define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 { %c = add <2 x i16> %a, %b ; add to prevent folding into extload %shl = shl <2 x i16> %c, %ashr = ashr <2 x i16> %shl, - store <2 x i16> %ashr, <2 x i16> addrspace(1)* %out + store <2 x i16> %ashr, ptr addrspace(1) %out ret void } @@ -707,11 +707,11 @@ define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(<2 x i16> addrspace(1)* %ou ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}} ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}} ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}} -define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 { +define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) #0 { %c = add <3 x i16> %a, %b ; add to prevent folding into extload %shl = shl <3 x i16> %c, %ashr = ashr <3 x i16> %shl, - store <3 x i16> %ashr, <3 x i16> addrspace(1)* %out + store <3 x i16> %ashr, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sign_extend.ll b/llvm/test/CodeGen/AMDGPU/sign_extend.ll index 5dd8bee..9a03d21 100644 --- a/llvm/test/CodeGen/AMDGPU/sign_extend.ll +++ b/llvm/test/CodeGen/AMDGPU/sign_extend.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=amdgcn-- -amdgpu-scalarize-global-loads=false -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -allow-deprecated-dag-overlap -enable-var-scope --check-prefix=SI ; RUN: llc -mtriple=amdgcn-- -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -allow-deprecated-dag-overlap -enable-var-scope --check-prefix=VI -define amdgpu_kernel void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { +define amdgpu_kernel void @s_sext_i1_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { ; SI-LABEL: s_sext_i1_to_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -32,11 +32,11 @@ define amdgpu_kernel void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %a, %b %sext = sext i1 %cmp to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind { +define amdgpu_kernel void @test_s_sext_i32_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) nounwind { ; SI-LABEL: test_s_sext_i32_to_i64: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -74,11 +74,11 @@ entry: %mul = mul i32 %a, %b %add = add i32 %mul, %c %sext = sext i32 %add to i64 - store i64 %sext, i64 addrspace(1)* %out, align 8 + store i64 %sext, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { +define amdgpu_kernel void @s_sext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { ; SI-LABEL: s_sext_i1_to_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -110,11 +110,11 @@ define amdgpu_kernel void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %a, %b %sext = sext i1 %cmp to i64 - store i64 %sext, i64 addrspace(1)* %out, align 8 + store i64 %sext, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind { +define amdgpu_kernel void @s_sext_i32_to_i64(ptr addrspace(1) %out, i32 %a) nounwind { ; SI-LABEL: s_sext_i32_to_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -141,11 +141,11 @@ define amdgpu_kernel void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nou ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm %sext = sext i32 %a to i64 - store i64 %sext, i64 addrspace(1)* %out, align 8 + store i64 %sext, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { +define amdgpu_kernel void @v_sext_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { ; SI-LABEL: v_sext_i32_to_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -181,13 +181,13 @@ define amdgpu_kernel void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspa ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm - %val = load i32, i32 addrspace(1)* %in, align 4 + %val = load i32, ptr addrspace(1) %in, align 4 %sext = sext i32 %val to i64 - store i64 %sext, i64 addrspace(1)* %out, align 8 + store i64 %sext, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind { +define amdgpu_kernel void @s_sext_i16_to_i64(ptr addrspace(1) %out, i16 %a) nounwind { ; SI-LABEL: s_sext_i16_to_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -214,11 +214,11 @@ define amdgpu_kernel void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nou ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm %sext = sext i16 %a to i64 - store i64 %sext, i64 addrspace(1)* %out, align 8 + store i64 %sext, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind { +define amdgpu_kernel void @s_sext_i1_to_i16(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { ; SI-LABEL: s_sext_i1_to_i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -248,7 +248,7 @@ define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %a, %b %sext = sext i1 %cmp to i16 - store i16 %sext, i16 addrspace(1)* %out + store i16 %sext, ptr addrspace(1) %out ret void } @@ -256,7 +256,7 @@ define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 ; makes it all the way throught the legalizer/optimizer to make sure ; we select this correctly. In the s_sext_i1_to_i16, the sign_extend node ; is optimized to a select very early. -define amdgpu_kernel void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +define amdgpu_kernel void @s_sext_i1_to_i16_with_and(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; SI-LABEL: s_sext_i1_to_i16_with_and: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb @@ -292,11 +292,11 @@ define amdgpu_kernel void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %cmp1 = icmp eq i32 %c, %d %cmp = and i1 %cmp0, %cmp1 %sext = sext i1 %cmp to i16 - store i16 %sext, i16 addrspace(1)* %out + store i16 %sext, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind { +define amdgpu_kernel void @v_sext_i1_to_i16_with_and(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) nounwind { ; SI-LABEL: v_sext_i1_to_i16_with_and: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -335,7 +335,7 @@ define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %cmp1 = icmp eq i32 %b, %c %cmp = and i1 %cmp0, %cmp1 %sext = sext i1 %cmp to i16 - store i16 %sext, i16 addrspace(1)* %out + store i16 %sext, ptr addrspace(1) %out ret void } @@ -347,7 +347,7 @@ define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 ; t55: i16 = srl t29, Constant:i32<8> ; t63: i32 = any_extend t55 ; t64: i32 = sign_extend_inreg t63, ValueType:ch:i8 -define amdgpu_kernel void @s_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 %a) nounwind { +define amdgpu_kernel void @s_sext_v4i8_to_v4i32(ptr addrspace(1) %out, i32 %a) nounwind { ; SI-LABEL: s_sext_v4i8_to_v4i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -403,16 +403,16 @@ define amdgpu_kernel void @s_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 %a) %elt1 = extractelement <4 x i32> %ext, i32 1 %elt2 = extractelement <4 x i32> %ext, i32 2 %elt3 = extractelement <4 x i32> %ext, i32 3 - store volatile i32 %elt0, i32 addrspace(1)* %out - store volatile i32 %elt1, i32 addrspace(1)* %out - store volatile i32 %elt2, i32 addrspace(1)* %out - store volatile i32 %elt3, i32 addrspace(1)* %out + store volatile i32 %elt0, ptr addrspace(1) %out + store volatile i32 %elt1, ptr addrspace(1) %out + store volatile i32 %elt2, ptr addrspace(1) %out + store volatile i32 %elt3, ptr addrspace(1) %out ret void } ; FIXME: need to optimize same sequence as above test to avoid ; this shift. -define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { +define amdgpu_kernel void @v_sext_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { ; SI-LABEL: v_sext_v4i8_to_v4i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -469,22 +469,22 @@ define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addr ; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm - %a = load i32, i32 addrspace(1)* %in + %a = load i32, ptr addrspace(1) %in %cast = bitcast i32 %a to <4 x i8> %ext = sext <4 x i8> %cast to <4 x i32> %elt0 = extractelement <4 x i32> %ext, i32 0 %elt1 = extractelement <4 x i32> %ext, i32 1 %elt2 = extractelement <4 x i32> %ext, i32 2 %elt3 = extractelement <4 x i32> %ext, i32 3 - store volatile i32 %elt0, i32 addrspace(1)* %out - store volatile i32 %elt1, i32 addrspace(1)* %out - store volatile i32 %elt2, i32 addrspace(1)* %out - store volatile i32 %elt3, i32 addrspace(1)* %out + store volatile i32 %elt0, ptr addrspace(1) %out + store volatile i32 %elt1, ptr addrspace(1) %out + store volatile i32 %elt2, ptr addrspace(1) %out + store volatile i32 %elt3, ptr addrspace(1) %out ret void } ; FIXME: s_bfe_i64, same on SI and VI -define amdgpu_kernel void @s_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 %a) nounwind { +define amdgpu_kernel void @s_sext_v4i16_to_v4i32(ptr addrspace(1) %out, i64 %a) nounwind { ; SI-LABEL: s_sext_v4i16_to_v4i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -542,14 +542,14 @@ define amdgpu_kernel void @s_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 %a) %elt1 = extractelement <4 x i32> %ext, i32 1 %elt2 = extractelement <4 x i32> %ext, i32 2 %elt3 = extractelement <4 x i32> %ext, i32 3 - store volatile i32 %elt0, i32 addrspace(1)* %out - store volatile i32 %elt1, i32 addrspace(1)* %out - store volatile i32 %elt2, i32 addrspace(1)* %out - store volatile i32 %elt3, i32 addrspace(1)* %out + store volatile i32 %elt0, ptr addrspace(1) %out + store volatile i32 %elt1, ptr addrspace(1) %out + store volatile i32 %elt2, ptr addrspace(1) %out + store volatile i32 %elt3, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { +define amdgpu_kernel void @v_sext_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { ; SI-LABEL: v_sext_v4i16_to_v4i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -605,17 +605,17 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 add ; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm - %a = load i64, i64 addrspace(1)* %in + %a = load i64, ptr addrspace(1) %in %cast = bitcast i64 %a to <4 x i16> %ext = sext <4 x i16> %cast to <4 x i32> %elt0 = extractelement <4 x i32> %ext, i32 0 %elt1 = extractelement <4 x i32> %ext, i32 1 %elt2 = extractelement <4 x i32> %ext, i32 2 %elt3 = extractelement <4 x i32> %ext, i32 3 - store volatile i32 %elt0, i32 addrspace(1)* %out - store volatile i32 %elt1, i32 addrspace(1)* %out - store volatile i32 %elt2, i32 addrspace(1)* %out - store volatile i32 %elt3, i32 addrspace(1)* %out + store volatile i32 %elt0, ptr addrspace(1) %out + store volatile i32 %elt1, ptr addrspace(1) %out + store volatile i32 %elt2, ptr addrspace(1) %out + store volatile i32 %elt3, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index f4ff6c1..d1f0535 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -4,7 +4,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone -define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @sint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) { ; CI-LABEL: sint_to_fp_i32_to_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -27,13 +27,13 @@ define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %result = sitofp i32 %in to double - store double %result, double addrspace(1)* %out + store double %result, ptr addrspace(1) %out ret void } ; We can't fold the SGPRs into v_cndmask_b32_e64, because it already ; uses an SGPR (implicit vcc). -define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @sint_to_fp_i1_f64(ptr addrspace(1) %out, i32 %in) { ; CI-LABEL: sint_to_fp_i1_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -63,11 +63,11 @@ define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %in, 0 %fp = sitofp i1 %cmp to double - store double %fp, double addrspace(1)* %out, align 4 + store double %fp, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) { +define amdgpu_kernel void @sint_to_fp_i1_f64_load(ptr addrspace(1) %out, i1 %in) { ; CI-LABEL: sint_to_fp_i1_f64_load: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -96,11 +96,11 @@ define amdgpu_kernel void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %fp = sitofp i1 %in to double - store double %fp, double addrspace(1)* %out, align 8 + store double %fp, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) { +define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) { ; CI-LABEL: s_sint_to_fp_i64_to_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -127,11 +127,11 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i6 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %result = sitofp i64 %in to double - store double %result, double addrspace(1)* %out + store double %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { +define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; CI-LABEL: v_sint_to_fp_i64_to_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -170,15 +170,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i6 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid - %val = load i64, i64 addrspace(1)* %gep, align 8 + %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid + %val = load i64, ptr addrspace(1) %gep, align 8 %result = sitofp i64 %val to double - store double %result, double addrspace(1)* %out + store double %result, ptr addrspace(1) %out ret void } ; FIXME: bfe and sext on VI+ -define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 %in) { +define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) { ; CI-LABEL: s_sint_to_fp_i8_to_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -204,7 +204,7 @@ define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %fp = sitofp i8 %in to double - store double %fp, double addrspace(1)* %out + store double %fp, ptr addrspace(1) %out ret void } @@ -227,7 +227,7 @@ define double @v_sint_to_fp_i8_to_f64(i8 %in) { ret double %fp } -define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { ; CI-LABEL: s_select_sint_to_fp_i1_vals_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -257,11 +257,11 @@ define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(double addrspace(1)* ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, double -1.0, double 0.0 - store double %select, double addrspace(1)* %out, align 8 + store double %select, ptr addrspace(1) %out, align 8 ret void } -define void @v_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) { +define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { ; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -274,11 +274,11 @@ define void @v_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) ; GCN-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, double -1.0, double 0.0 - store double %select, double addrspace(1)* %out, align 8 + store double %select, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) { ; CI-LABEL: s_select_sint_to_fp_i1_vals_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -308,11 +308,11 @@ define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %ou ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0 - store i64 %select, i64 addrspace(1)* %out, align 8 + store i64 %select, ptr addrspace(1) %out, align 8 ret void } -define void @v_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) { +define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) { ; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -325,12 +325,12 @@ define void @v_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) { ; GCN-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0 - store i64 %select, i64 addrspace(1)* %out, align 8 + store i64 %select, ptr addrspace(1) %out, align 8 ret void } ; TODO: This should swap the selected order / invert the compare and do it. -define void @v_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) { +define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { ; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -343,12 +343,12 @@ define void @v_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 ; GCN-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, double 0.0, double -1.0 - store double %select, double addrspace(1)* %out, align 8 + store double %select, ptr addrspace(1) %out, align 8 ret void } ; TODO: This should swap the selected order / invert the compare and do it. -define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { ; CI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -378,6 +378,6 @@ define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(double addrspace ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, double 0.0, double -1.0 - store double %select, double addrspace(1)* %out, align 8 + store double %select, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll index 27bfcce..f7d57da 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll @@ -4,7 +4,7 @@ ; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600 -define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 { +define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) #0 { ; GFX6-LABEL: s_sint_to_fp_i64_to_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -51,11 +51,11 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm %result = sitofp i64 %in to half - store half %result, half addrspace(1)* %out + store half %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: v_sint_to_fp_i64_to_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -115,15 +115,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 ; GFX8-NEXT: flat_store_short v[0:1], v3 ; GFX8-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid - %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid - %val = load i64, i64 addrspace(1)* %in.gep + %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid + %val = load i64, ptr addrspace(1) %in.gep %result = sitofp i64 %val to half - store half %result, half addrspace(1)* %out.gep + store half %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 { +define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 { ; GFX6-LABEL: s_sint_to_fp_i64_to_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -168,11 +168,11 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm %result = sitofp i64 %in to float - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: v_sint_to_fp_i64_to_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -230,15 +230,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid - %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %val = load i64, i64 addrspace(1)* %in.gep + %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %val = load i64, ptr addrspace(1) %in.gep %result = sitofp i64 %val to float - store float %result, float addrspace(1)* %out.gep + store float %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{ +define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) #0{ ; GFX6-LABEL: s_sint_to_fp_v2i64_to_v2f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -307,11 +307,11 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm %result = sitofp <2 x i64> %in to <2 x float> - store <2 x float> %result, <2 x float> addrspace(1)* %out + store <2 x float> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: v_sint_to_fp_v4i64_to_v4f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -448,15 +448,15 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* ; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[0:3] ; GFX8-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid - %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid - %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep + %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid + %value = load <4 x i64>, ptr addrspace(1) %in.gep %result = sitofp <4 x i64> %value to <4 x float> - store <4 x float> %result, <4 x float> addrspace(1)* %out.gep + store <4 x float> %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{ +define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) #0{ ; GFX6-LABEL: s_sint_to_fp_v2i64_to_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -532,11 +532,11 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm %result = sitofp <2 x i64> %in to <2 x half> - store <2 x half> %result, <2 x half> addrspace(1)* %out + store <2 x half> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: v_sint_to_fp_v4i64_to_v4f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -687,11 +687,11 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid - %out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid - %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep + %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr <4 x half>, ptr addrspace(1) %out, i32 %tid + %value = load <4 x i64>, ptr addrspace(1) %in.gep %result = sitofp <4 x i64> %value to <4 x half> - store <4 x half> %result, <4 x half> addrspace(1)* %out.gep + store <4 x half> %result, ptr addrspace(1) %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll index b62b0de..73f8140 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll @@ -6,9 +6,9 @@ ; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}} ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z -define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) #0 { %result = sitofp i32 %in to float - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } @@ -16,13 +16,13 @@ define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 ; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{v[0-9]+$}} ; R600: INT_TO_FLT -define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep %result = sitofp i32 %val to float - store float %result, float addrspace(1)* %out.gep + store float %result, ptr addrspace(1) %out.gep ret void } @@ -32,9 +32,9 @@ define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 ; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W ; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X -define amdgpu_kernel void @s_sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0{ +define amdgpu_kernel void @s_sint_to_fp_v2i32(ptr addrspace(1) %out, <2 x i32> %in) #0{ %result = sitofp <2 x i32> %in to <2 x float> - store <2 x float> %result, <2 x float> addrspace(1)* %out + store <2 x float> %result, ptr addrspace(1) %out ret void } @@ -49,10 +49,10 @@ define amdgpu_kernel void @s_sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { - %value = load <4 x i32>, <4 x i32> addrspace(1) * %in +define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %value = load <4 x i32>, ptr addrspace(1) %in %result = sitofp <4 x i32> %value to <4 x float> - store <4 x float> %result, <4 x float> addrspace(1)* %out + store <4 x float> %result, ptr addrspace(1) %out ret void } @@ -66,13 +66,13 @@ define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_sint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid - %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid - %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep + %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid + %value = load <4 x i32>, ptr addrspace(1) %in.gep %result = sitofp <4 x i32> %value to <4 x float> - store <4 x float> %result, <4 x float> addrspace(1)* %out.gep + store <4 x float> %result, ptr addrspace(1) %out.gep ret void } @@ -82,10 +82,10 @@ define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm -define amdgpu_kernel void @s_sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @s_sint_to_fp_i1_f32(ptr addrspace(1) %out, i32 %in) #0 { %cmp = icmp eq i32 %in, 0 %fp = uitofp i1 %cmp to float - store float %fp, float addrspace(1)* %out + store float %fp, ptr addrspace(1) %out ret void } @@ -93,9 +93,9 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0 ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm -define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) #0 { +define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, i1 %in) #0 { %fp = sitofp i1 %in to float - store float %fp, float addrspace(1)* %out + store float %fp, ptr addrspace(1) %out ret void } @@ -106,13 +106,13 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid - %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %val = load i1, i1 addrspace(1)* %in.gep + %in.gep = getelementptr i1, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %val = load i1, ptr addrspace(1) %in.gep %fp = sitofp i1 %val to float - store float %fp, float addrspace(1)* %out.gep + store float %fp, ptr addrspace(1) %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll index 144c693..aeeb2fd 100644 --- a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll @@ -12,12 +12,12 @@ ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @sitofp_i16_to_f16( - half addrspace(1)* %r, - i16 addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load i16, i16 addrspace(1)* %a + %a.val = load i16, ptr addrspace(1) %a %r.val = sitofp i16 %a.val to half - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -28,12 +28,12 @@ entry: ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @sitofp_i32_to_f16( - half addrspace(1)* %r, - i32 addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load i32, i32 addrspace(1)* %a + %a.val = load i32, ptr addrspace(1) %a %r.val = sitofp i32 %a.val to half - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -57,12 +57,12 @@ entry: ; GCN: s_endpgm define amdgpu_kernel void @sitofp_v2i16_to_v2f16( - <2 x half> addrspace(1)* %r, - <2 x i16> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a + %a.val = load <2 x i16>, ptr addrspace(1) %a %r.val = sitofp <2 x i16> %a.val to <2 x half> - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -85,12 +85,12 @@ entry: ; GCN: buffer_store_dword ; GCN: s_endpgm define amdgpu_kernel void @sitofp_v2i32_to_v2f16( - <2 x half> addrspace(1)* %r, - <2 x i32> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a + %a.val = load <2 x i32>, ptr addrspace(1) %a %r.val = sitofp <2 x i32> %a.val to <2 x half> - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -102,14 +102,14 @@ entry: ; GCN-NEXT: v_cvt_f16_f32_e32 [[R_F16:v[0-9]+]], [[RESULT]] ; GCN: buffer_store_short ; GCN: s_endpgm -define amdgpu_kernel void @s_sint_to_fp_i1_to_f16(half addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { - %a = load float, float addrspace(1) * %in0 - %b = load float, float addrspace(1) * %in1 +define amdgpu_kernel void @s_sint_to_fp_i1_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { + %a = load float, ptr addrspace(1) %in0 + %b = load float, ptr addrspace(1) %in1 %acmp = fcmp oge float %a, 0.000000e+00 %bcmp = fcmp oge float %b, 1.000000e+00 %result = xor i1 %acmp, %bcmp %fp = sitofp i1 %result to half - store half %fp, half addrspace(1)* %out + store half %fp, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index ddb9aef..ba52d70 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -4,7 +4,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone -define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { +define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; SI-LABEL: v_uint_to_fp_i64_to_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -43,14 +43,14 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i6 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid - %val = load i64, i64 addrspace(1)* %gep, align 8 + %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid + %val = load i64, ptr addrspace(1) %gep, align 8 %result = uitofp i64 %val to double - store double %result, double addrspace(1)* %out + store double %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) { +define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) { ; SI-LABEL: s_uint_to_fp_i64_to_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -77,11 +77,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i6 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %cast = uitofp i64 %in to double - store double %cast, double addrspace(1)* %out, align 8 + store double %cast, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i64> %in) { +define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(ptr addrspace(1) %out, <2 x i64> %in) { ; SI-LABEL: s_uint_to_fp_v2i64_to_v2f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4 @@ -119,11 +119,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1) ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm %cast = uitofp <2 x i64> %in to <2 x double> - store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16 + store <2 x double> %cast, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %in) { +define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(ptr addrspace(1) %out, <4 x i64> %in) { ; SI-LABEL: s_uint_to_fp_v4i64_to_v4f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x8 @@ -187,11 +187,11 @@ define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1) ; VI-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; VI-NEXT: s_endpgm %cast = uitofp <4 x i64> %in to <4 x double> - store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16 + store <4 x double> %cast, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) { ; SI-LABEL: s_uint_to_fp_i32_to_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -214,11 +214,11 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i3 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %cast = uitofp i32 %in to double - store double %cast, double addrspace(1)* %out, align 8 + store double %cast, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i32> %in) { +define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(ptr addrspace(1) %out, <2 x i32> %in) { ; GCN-LABEL: s_uint_to_fp_v2i32_to_v2f64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -230,11 +230,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1) ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm %cast = uitofp <2 x i32> %in to <2 x double> - store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16 + store <2 x double> %cast, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i32> %in) { +define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(ptr addrspace(1) %out, <4 x i32> %in) { ; SI-LABEL: s_uint_to_fp_v4i32_to_v4f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4 @@ -275,13 +275,13 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1) ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm %cast = uitofp <4 x i32> %in to <4 x double> - store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16 + store <4 x double> %cast, ptr addrspace(1) %out, align 16 ret void } ; We can't fold the SGPRs into v_cndmask_b32_e32, because it already ; uses an SGPR (implicit vcc). -define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @uint_to_fp_i1_to_f64(ptr addrspace(1) %out, i32 %in) { ; SI-LABEL: uint_to_fp_i1_to_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -311,11 +311,11 @@ define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 % ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %in, 0 %fp = uitofp i1 %cmp to double - store double %fp, double addrspace(1)* %out, align 4 + store double %fp, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out, i1 %in) { +define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(ptr addrspace(1) %out, i1 %in) { ; SI-LABEL: uint_to_fp_i1_to_f64_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -344,11 +344,11 @@ define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out, ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %fp = uitofp i1 %in to double - store double %fp, double addrspace(1)* %out, align 8 + store double %fp, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 %in) { +define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) { ; SI-LABEL: s_uint_to_fp_i8_to_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -373,7 +373,7 @@ define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %fp = uitofp i8 %in to double - store double %fp, double addrspace(1)* %out + store double %fp, ptr addrspace(1) %out ret void } @@ -397,7 +397,7 @@ define double @v_uint_to_fp_i8_to_f64(i8 %in) { ret double %fp } -define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { ; SI-LABEL: s_select_uint_to_fp_i1_vals_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -427,11 +427,11 @@ define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(double addrspace(1)* ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, double 1.0, double 0.0 - store double %select, double addrspace(1)* %out, align 8 + store double %select, ptr addrspace(1) %out, align 8 ret void } -define void @v_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) { +define void @v_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { ; GCN-LABEL: v_select_uint_to_fp_i1_vals_f64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -444,11 +444,11 @@ define void @v_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) ; GCN-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, double 1.0, double 0.0 - store double %select, double addrspace(1)* %out, align 8 + store double %select, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) { ; SI-LABEL: s_select_uint_to_fp_i1_vals_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -478,11 +478,11 @@ define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %ou ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0 - store i64 %select, i64 addrspace(1)* %out, align 8 + store i64 %select, ptr addrspace(1) %out, align 8 ret void } -define void @v_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) { +define void @v_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) { ; GCN-LABEL: v_select_uint_to_fp_i1_vals_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -495,12 +495,12 @@ define void @v_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) { ; GCN-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0 - store i64 %select, i64 addrspace(1)* %out, align 8 + store i64 %select, ptr addrspace(1) %out, align 8 ret void } ; TODO: This should swap the selected order / invert the compare and do it. -define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { ; SI-LABEL: s_swap_select_uint_to_fp_i1_vals_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -530,11 +530,11 @@ define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(double addrspace ; VI-NEXT: s_endpgm %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, double 0.0, double 1.0 - store double %select, double addrspace(1)* %out, align 8 + store double %select, ptr addrspace(1) %out, align 8 ret void } -define void @v_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) { +define void @v_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { ; GCN-LABEL: v_swap_select_uint_to_fp_i1_vals_f64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -547,6 +547,6 @@ define void @v_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 ; GCN-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %in, 0 %select = select i1 %cmp, double 0.0, double 1.0 - store double %select, double addrspace(1)* %out, align 8 + store double %select, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll index 8f8acf4..226facf 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll @@ -4,7 +4,7 @@ ; FIXME: This should be merged with uint_to_fp.ll, but s_uint_to_fp_v2i64 crashes on r600 -define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 { +define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) #0 { ; GFX6-LABEL: s_uint_to_fp_i64_to_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -43,11 +43,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm %result = uitofp i64 %in to half - store half %result, half addrspace(1)* %out + store half %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: v_uint_to_fp_i64_to_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -99,15 +99,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 ; GFX8-NEXT: flat_store_short v[0:1], v3 ; GFX8-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid - %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid - %val = load i64, i64 addrspace(1)* %in.gep + %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid + %val = load i64, ptr addrspace(1) %in.gep %result = uitofp i64 %val to half - store half %result, half addrspace(1)* %out.gep + store half %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 { +define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 { ; GFX6-LABEL: s_uint_to_fp_i64_to_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -144,11 +144,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm %result = uitofp i64 %in to float - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: v_uint_to_fp_i64_to_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -198,15 +198,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid - %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %val = load i64, i64 addrspace(1)* %in.gep + %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %val = load i64, ptr addrspace(1) %in.gep %result = uitofp i64 %val to float - store float %result, float addrspace(1)* %out.gep + store float %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{ +define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) #0{ ; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -259,11 +259,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm %result = uitofp <2 x i64> %in to <2 x float> - store <2 x float> %result, <2 x float> addrspace(1)* %out + store <2 x float> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -368,15 +368,15 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* ; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[0:3] ; GFX8-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid - %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid - %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep + %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid + %value = load <4 x i64>, ptr addrspace(1) %in.gep %result = uitofp <4 x i64> %value to <4 x float> - store <4 x float> %result, <4 x float> addrspace(1)* %out.gep + store <4 x float> %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{ +define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) #0{ ; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -436,11 +436,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm %result = uitofp <2 x i64> %in to <2 x half> - store <2 x half> %result, <2 x half> addrspace(1)* %out + store <2 x half> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -559,11 +559,11 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid - %out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid - %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep + %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr <4 x half>, ptr addrspace(1) %out, i32 %tid + %value = load <4 x i64>, ptr addrspace(1) %in.gep %result = uitofp <4 x i64> %value to <4 x half> - store <4 x half> %result, <4 x half> addrspace(1)* %out.gep + store <4 x half> %result, ptr addrspace(1) %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll index e0a1a75..34cf1b6 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll @@ -6,9 +6,9 @@ ; SI: v_cvt_f32_u32_e32 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z -define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) #0 { %result = uitofp i32 %in to float - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } @@ -16,13 +16,13 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 ; SI: v_cvt_f32_u32_e32 {{v[0-9]+}}, {{v[0-9]+$}} ; R600: INT_TO_FLT -define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep %result = uitofp i32 %val to float - store float %result, float addrspace(1)* %out.gep + store float %result, ptr addrspace(1) %out.gep ret void } @@ -32,9 +32,9 @@ define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X -define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0 { +define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(ptr addrspace(1) %out, <2 x i32> %in) #0 { %result = uitofp <2 x i32> %in to <2 x float> - store <2 x float> %result, <2 x float> addrspace(1)* %out + store <2 x float> %result, ptr addrspace(1) %out ret void } @@ -49,10 +49,10 @@ define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { - %value = load <4 x i32>, <4 x i32> addrspace(1) * %in +define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %value = load <4 x i32>, ptr addrspace(1) %in %result = uitofp <4 x i32> %value to <4 x float> - store <4 x float> %result, <4 x float> addrspace(1)* %out + store <4 x float> %result, ptr addrspace(1) %out ret void } @@ -66,13 +66,13 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_uint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid - %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid - %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep + %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid + %value = load <4 x i32>, ptr addrspace(1) %in.gep %result = uitofp <4 x i32> %value to <4 x float> - store <4 x float> %result, <4 x float> addrspace(1)* %out.gep + store <4 x float> %result, ptr addrspace(1) %out.gep ret void } @@ -82,10 +82,10 @@ define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm -define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(ptr addrspace(1) %out, i32 %in) #0 { %cmp = icmp eq i32 %in, 0 %fp = uitofp i1 %cmp to float - store float %fp, float addrspace(1)* %out + store float %fp, ptr addrspace(1) %out ret void } @@ -93,9 +93,9 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0 ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm -define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) #0 { +define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(ptr addrspace(1) %out, i1 %in) #0 { %fp = uitofp i1 %in to float - store float %fp, float addrspace(1)* %out + store float %fp, ptr addrspace(1) %out ret void } @@ -106,13 +106,13 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid - %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %val = load i1, i1 addrspace(1)* %in.gep + %in.gep = getelementptr i1, ptr addrspace(1) %in, i32 %tid + %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %val = load i1, ptr addrspace(1) %in.gep %fp = uitofp i1 %val to float - store float %fp, float addrspace(1)* %out.gep + store float %fp, ptr addrspace(1) %out.gep ret void } @@ -122,10 +122,10 @@ define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 ; R600: CNDE_INT ; R600: UINT_TO_FLT -define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 { +define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 { entry: %cvt = uitofp i64 %in to float - store float %cvt, float addrspace(1)* %out + store float %cvt, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll index 99a3141..fd99d80 100644 --- a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll @@ -11,12 +11,12 @@ ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @uitofp_i16_to_f16( - half addrspace(1)* %r, - i16 addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load i16, i16 addrspace(1)* %a + %a.val = load i16, ptr addrspace(1) %a %r.val = uitofp i16 %a.val to half - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -27,12 +27,12 @@ entry: ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @uitofp_i32_to_f16( - half addrspace(1)* %r, - i32 addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load i32, i32 addrspace(1)* %a + %a.val = load i32, ptr addrspace(1) %a %r.val = uitofp i32 %a.val to half - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -56,12 +56,12 @@ entry: ; GCN: buffer_store_dword ; GCN: s_endpgm define amdgpu_kernel void @uitofp_v2i16_to_v2f16( - <2 x half> addrspace(1)* %r, - <2 x i16> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a + %a.val = load <2 x i16>, ptr addrspace(1) %a %r.val = uitofp <2 x i16> %a.val to <2 x half> - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -84,12 +84,12 @@ entry: ; GCN: buffer_store_dword ; GCN: s_endpgm define amdgpu_kernel void @uitofp_v2i32_to_v2f16( - <2 x half> addrspace(1)* %r, - <2 x i32> addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a + %a.val = load <2 x i32>, ptr addrspace(1) %a %r.val = uitofp <2 x i32> %a.val to <2 x half> - store <2 x half> %r.val, <2 x half> addrspace(1)* %r + store <2 x half> %r.val, ptr addrspace(1) %r ret void } @@ -101,14 +101,14 @@ entry: ; GCN-NEXT: v_cvt_f16_f32_e32 [[R_F16:v[0-9]+]], [[RESULT]] ; GCN: buffer_store_short ; GCN: s_endpgm -define amdgpu_kernel void @s_uint_to_fp_i1_to_f16(half addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { - %a = load float, float addrspace(1) * %in0 - %b = load float, float addrspace(1) * %in1 +define amdgpu_kernel void @s_uint_to_fp_i1_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { + %a = load float, ptr addrspace(1) %in0 + %b = load float, ptr addrspace(1) %in1 %acmp = fcmp oge float %a, 0.000000e+00 %bcmp = fcmp oge float %b, 1.000000e+00 %result = xor i1 %acmp, %bcmp %fp = uitofp i1 %result to half - store half %fp, half addrspace(1)* %out + store half %fp, ptr addrspace(1) %out ret void } -- 2.7.4