-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
@lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
@lds.size.2.align.2 = internal unnamed_addr addrspace(3) global [2 x i8] undef, align 2
;.
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8
-; CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0, i32 0) to i8*)], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16
; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16
; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 2
;.
define amdgpu_kernel void @k0() #0 {
; CHECK-LABEL: @k0(
-; CHECK-NEXT: %lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 1, i8 addrspace(3)* %lds.size.1.align.1.bc, align 2, !alias.scope !0, !noalias !3
-; CHECK-NEXT: %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 4
-; CHECK-NEXT: %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 16
-; CHECK-NEXT: %lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 0) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 16, i8 addrspace(3)* %lds.size.16.align.16.bc, align 16
+; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !0, !noalias !3
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !7, !noalias !8
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !9, !noalias !10
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !11, !noalias !12
; CHECK-NEXT: ret void
-;
- %lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* @lds.size.1.align.1 to i8 addrspace(3)*
- store i8 1, i8 addrspace(3)* %lds.size.1.align.1.bc, align 1
+ store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
- %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* @lds.size.2.align.2 to i8 addrspace(3)*
- store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 2
+ store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2
- %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* @lds.size.4.align.4 to i8 addrspace(3)*
- store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 4
+ store i8 4, ptr addrspace(3) @lds.size.4.align.4, align 4
- %lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* @lds.size.16.align.16 to i8 addrspace(3)*
- store i8 16, i8 addrspace(3)* %lds.size.16.align.16.bc, align 16
+ store i8 16, ptr addrspace(3) @lds.size.16.align.16, align 16
ret void
}
define amdgpu_kernel void @k1() #0 {
; CHECK-LABEL: @k1(
-; CHECK-NEXT: %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 4
-; CHECK-NEXT: %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 16
-; CHECK-NEXT: %lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 0) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 16, i8 addrspace(3)* %lds.size.16.align.16.bc, align 16
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !13, !noalias !16
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !19, !noalias !20
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !21, !noalias !22
; CHECK-NEXT: ret void
;
- %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* @lds.size.2.align.2 to i8 addrspace(3)*
- store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 2
+ store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2
- %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* @lds.size.4.align.4 to i8 addrspace(3)*
- store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 4
+ store i8 4, ptr addrspace(3) @lds.size.4.align.4, align 4
- %lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* @lds.size.16.align.16 to i8 addrspace(3)*
- store i8 16, i8 addrspace(3)* %lds.size.16.align.16.bc, align 16
+ store i8 16, ptr addrspace(3) @lds.size.16.align.16, align 16
ret void
}
define amdgpu_kernel void @k2() #0 {
; CHECK-LABEL: @k2(
-; CHECK-NEXT: %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k2.lds.t, %llvm.amdgcn.kernel.k2.lds.t addrspace(3)* @llvm.amdgcn.kernel.k2.lds, i32 0, i32 0) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 2
+; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, align 2
; CHECK-NEXT: ret void
;
- %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* @lds.size.2.align.2 to i8 addrspace(3)*
- store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 2
+ store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2
ret void
}
define amdgpu_kernel void @k3() #0 {
; CHECK-LABEL: @k3(
-; CHECK-NEXT: %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k3.lds.t, %llvm.amdgcn.kernel.k3.lds.t addrspace(3)* @llvm.amdgcn.kernel.k3.lds, i32 0, i32 0) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 4
+; CHECK-NEXT: store i8 4, ptr addrspace(3) @llvm.amdgcn.kernel.k3.lds, align 4
; CHECK-NEXT: ret void
;
- %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* @lds.size.4.align.4 to i8 addrspace(3)*
- store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 4
+ store i8 4, ptr addrspace(3) @lds.size.4.align.4, align 4
ret void
}
}
define void @f0() {
-; CHECK-LABEL: @f0(
-; CHECK: %lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 1, i8 addrspace(3)* %lds.size.1.align.1.bc, align 8
-; CHECK-NEXT: %lds.size.8.align.8.bc = bitcast [8 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 8, i8 addrspace(3)* %lds.size.8.align.8.bc, align 8
-; CHECK-NEXT: ret void
-;
- %lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* @lds.size.1.align.1 to i8 addrspace(3)*
- store i8 1, i8 addrspace(3)* %lds.size.1.align.1.bc, align 1
+; CHECK-LABEL: define void @f0(
+; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !23
+; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !23
+; CHECK-NEXT: ret void
+ store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
- %lds.size.8.align.8.bc = bitcast [8 x i8] addrspace(3)* @lds.size.8.align.8 to i8 addrspace(3)*
- store i8 8, i8 addrspace(3)* %lds.size.8.align.8.bc, align 4
+ store i8 8, ptr addrspace(3) @lds.size.8.align.8, align 4
ret void
}
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [32 x i8] }
; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i16, [2 x i8], i16 }
; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] }
-; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i32 addrspace(3)*] }
+; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x ptr addrspace(3)] }
; SUPER-ALIGN_ON: @lds.unused = addrspace(3) global i32 undef, align 4
; SUPER-ALIGN_OFF: @lds.unused = addrspace(3) global i32 undef, align 2
@lds.unused = addrspace(3) global i32 undef, align 2
-@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i32 addrspace(3)* @lds.unused to i8*)], section "llvm.metadata"
+@llvm.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @lds.unused to ptr)], section "llvm.metadata"
; CHECK-NOT: @lds.1
@lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] undef, align 1
; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 4
; CHECK-LABEL: @k1
-; CHECK: %1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 0), i32 0, i32 0
-; CHECK: %2 = addrspacecast i8 addrspace(3)* %1 to i8*
-; CHECK: %ptr = getelementptr inbounds i8, i8* %2, i64 %x
-; CHECK: store i8 1, i8* %ptr, align 1
+; CHECK: %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds to ptr
+; CHECK: %ptr = getelementptr inbounds i8, ptr %1, i64 %x
+; CHECK: store i8 1, ptr %ptr, align 1
define amdgpu_kernel void @k1(i64 %x) {
- %ptr = getelementptr inbounds i8, i8* addrspacecast ([32 x i8] addrspace(3)* @lds.1 to i8*), i64 %x
- store i8 1, i8 addrspace(0)* %ptr, align 1
+ %ptr = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(3) @lds.1 to ptr), i64 %x
+ store i8 1, ptr addrspace(0) %ptr, align 1
ret void
}
; Check that alignment is propagated to uses for scalar variables.
; CHECK-LABEL: @k2
-; CHECK: store i16 1, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k2.lds.t, %llvm.amdgcn.kernel.k2.lds.t addrspace(3)* @llvm.amdgcn.kernel.k2.lds, i32 0, i32 0), align 4
-; CHECK: store i16 2, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k2.lds.t, %llvm.amdgcn.kernel.k2.lds.t addrspace(3)* @llvm.amdgcn.kernel.k2.lds, i32 0, i32 2), align 4
+; CHECK: store i16 1, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, align 4
+; CHECK: store i16 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, i32 0, i32 2), align 4
define amdgpu_kernel void @k2() {
- store i16 1, i16 addrspace(3)* @lds.2, align 2
- store i16 2, i16 addrspace(3)* @lds.3, align 2
+ store i16 1, ptr addrspace(3) @lds.2, align 2
+ store i16 2, ptr addrspace(3) @lds.3, align 2
ret void
}
; Check that alignment is propagated to uses for arrays.
; CHECK-LABEL: @k3
-; CHECK: store i32 1, i32 addrspace(3)* %ptr1, align 8
-; CHECK: store i32 2, i32 addrspace(3)* %ptr2, align 4
-; SUPER-ALIGN_ON: store i32 3, i32 addrspace(3)* %ptr3, align 16
-; SUPER-ALIGN_OFF: store i32 3, i32 addrspace(3)* %ptr3, align 8
-; CHECK: store i32 4, i32 addrspace(3)* %ptr4, align 4
-; CHECK: store i32 5, i32 addrspace(3)* %ptr5, align 4
-; CHECK: %load1 = load i32, i32 addrspace(3)* %ptr1, align 8
-; CHECK: %load2 = load i32, i32 addrspace(3)* %ptr2, align 4
-; SUPER-ALIGN_ON: %load3 = load i32, i32 addrspace(3)* %ptr3, align 16
-; SUPER-ALIGN_OFF: %load3 = load i32, i32 addrspace(3)* %ptr3, align 8
-; CHECK: %load4 = load i32, i32 addrspace(3)* %ptr4, align 4
-; CHECK: %load5 = load i32, i32 addrspace(3)* %ptr5, align 4
-; CHECK: %val1 = atomicrmw volatile add i32 addrspace(3)* %ptr1, i32 1 monotonic, align 8
-; CHECK: %val2 = cmpxchg volatile i32 addrspace(3)* %ptr1, i32 1, i32 2 monotonic monotonic, align 8
-; CHECK: %ptr1.bc = bitcast i32 addrspace(3)* %ptr1 to i16 addrspace(3)*
-; CHECK: %ptr2.bc = bitcast i32 addrspace(3)* %ptr2 to i16 addrspace(3)*
-; CHECK: %ptr3.bc = bitcast i32 addrspace(3)* %ptr3 to i16 addrspace(3)*
-; CHECK: %ptr4.bc = bitcast i32 addrspace(3)* %ptr4 to i16 addrspace(3)*
-; CHECK: store i16 11, i16 addrspace(3)* %ptr1.bc, align 8
-; CHECK: store i16 12, i16 addrspace(3)* %ptr2.bc, align 4
-; SUPER-ALIGN_ON: store i16 13, i16 addrspace(3)* %ptr3.bc, align 16
-; SUPER-ALIGN_OFF: store i16 13, i16 addrspace(3)* %ptr3.bc, align 8
-; CHECK: store i16 14, i16 addrspace(3)* %ptr4.bc, align 4
-; CHECK: %ptr1.ac = addrspacecast i32 addrspace(3)* %ptr1 to i32*
-; CHECK: %ptr2.ac = addrspacecast i32 addrspace(3)* %ptr2 to i32*
-; CHECK: %ptr3.ac = addrspacecast i32 addrspace(3)* %ptr3 to i32*
-; CHECK: %ptr4.ac = addrspacecast i32 addrspace(3)* %ptr4 to i32*
-; CHECK: store i32 21, i32* %ptr1.ac, align 8
-; CHECK: store i32 22, i32* %ptr2.ac, align 4
-; SUPER-ALIGN_ON: store i32 23, i32* %ptr3.ac, align 16
-; SUPER-ALIGN_OFF: store i32 23, i32* %ptr3.ac, align 8
-; CHECK: store i32 24, i32* %ptr4.ac, align 4
+; CHECK: store i32 1, ptr addrspace(3) %ptr1, align 8
+; CHECK: store i32 2, ptr addrspace(3) %ptr2, align 4
+; SUPER-ALIGN_ON: store i32 3, ptr addrspace(3) %ptr3, align 16
+; SUPER-ALIGN_OFF: store i32 3, ptr addrspace(3) %ptr3, align 8
+; CHECK: store i32 4, ptr addrspace(3) %ptr4, align 4
+; CHECK: store i32 5, ptr addrspace(3) %ptr5, align 4
+; CHECK: %load1 = load i32, ptr addrspace(3) %ptr1, align 8
+; CHECK: %load2 = load i32, ptr addrspace(3) %ptr2, align 4
+; SUPER-ALIGN_ON: %load3 = load i32, ptr addrspace(3) %ptr3, align 16
+; SUPER-ALIGN_OFF: %load3 = load i32, ptr addrspace(3) %ptr3, align 8
+; CHECK: %load4 = load i32, ptr addrspace(3) %ptr4, align 4
+; CHECK: %load5 = load i32, ptr addrspace(3) %ptr5, align 4
+; CHECK: %val1 = atomicrmw volatile add ptr addrspace(3) %ptr1, i32 1 monotonic, align 8
+; CHECK: %val2 = cmpxchg volatile ptr addrspace(3) %ptr1, i32 1, i32 2 monotonic monotonic, align 8
+; CHECK: store i16 11, ptr addrspace(3) %ptr1, align 8
+; CHECK: store i16 12, ptr addrspace(3) %ptr2, align 4
+; SUPER-ALIGN_ON: store i16 13, ptr addrspace(3) %ptr3, align 16
+; SUPER-ALIGN_OFF: store i16 13, ptr addrspace(3) %ptr3, align 8
+; CHECK: store i16 14, ptr addrspace(3) %ptr4, align 4
+; CHECK: %ptr1.ac = addrspacecast ptr addrspace(3) %ptr1 to ptr
+; CHECK: %ptr2.ac = addrspacecast ptr addrspace(3) %ptr2 to ptr
+; CHECK: %ptr3.ac = addrspacecast ptr addrspace(3) %ptr3 to ptr
+; CHECK: %ptr4.ac = addrspacecast ptr addrspace(3) %ptr4 to ptr
+; CHECK: store i32 21, ptr %ptr1.ac, align 8
+; CHECK: store i32 22, ptr %ptr2.ac, align 4
+; SUPER-ALIGN_ON: store i32 23, ptr %ptr3.ac, align 16
+; SUPER-ALIGN_OFF: store i32 23, ptr %ptr3.ac, align 8
+; CHECK: store i32 24, ptr %ptr4.ac, align 4
define amdgpu_kernel void @k3(i64 %x) {
- %ptr0 = getelementptr inbounds i64, i64 addrspace(3)* bitcast ([32 x i64] addrspace(3)* @lds.4 to i64 addrspace(3)*), i64 0
- store i64 0, i64 addrspace(3)* %ptr0, align 8
-
- %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* bitcast ([32 x i32] addrspace(3)* @lds.5 to i32 addrspace(3)*), i64 2
- %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* bitcast ([32 x i32] addrspace(3)* @lds.5 to i32 addrspace(3)*), i64 3
- %ptr3 = getelementptr inbounds i32, i32 addrspace(3)* bitcast ([32 x i32] addrspace(3)* @lds.5 to i32 addrspace(3)*), i64 4
- %ptr4 = getelementptr inbounds i32, i32 addrspace(3)* bitcast ([32 x i32] addrspace(3)* @lds.5 to i32 addrspace(3)*), i64 5
- %ptr5 = getelementptr inbounds i32, i32 addrspace(3)* bitcast ([32 x i32] addrspace(3)* @lds.5 to i32 addrspace(3)*), i64 %x
-
- store i32 1, i32 addrspace(3)* %ptr1, align 4
- store i32 2, i32 addrspace(3)* %ptr2, align 4
- store i32 3, i32 addrspace(3)* %ptr3, align 4
- store i32 4, i32 addrspace(3)* %ptr4, align 4
- store i32 5, i32 addrspace(3)* %ptr5, align 4
-
- %load1 = load i32, i32 addrspace(3)* %ptr1, align 4
- %load2 = load i32, i32 addrspace(3)* %ptr2, align 4
- %load3 = load i32, i32 addrspace(3)* %ptr3, align 4
- %load4 = load i32, i32 addrspace(3)* %ptr4, align 4
- %load5 = load i32, i32 addrspace(3)* %ptr5, align 4
-
- %val1 = atomicrmw volatile add i32 addrspace(3)* %ptr1, i32 1 monotonic, align 4
- %val2 = cmpxchg volatile i32 addrspace(3)* %ptr1, i32 1, i32 2 monotonic monotonic, align 4
-
- %ptr1.bc = bitcast i32 addrspace(3)* %ptr1 to i16 addrspace(3)*
- %ptr2.bc = bitcast i32 addrspace(3)* %ptr2 to i16 addrspace(3)*
- %ptr3.bc = bitcast i32 addrspace(3)* %ptr3 to i16 addrspace(3)*
- %ptr4.bc = bitcast i32 addrspace(3)* %ptr4 to i16 addrspace(3)*
-
- store i16 11, i16 addrspace(3)* %ptr1.bc, align 2
- store i16 12, i16 addrspace(3)* %ptr2.bc, align 2
- store i16 13, i16 addrspace(3)* %ptr3.bc, align 2
- store i16 14, i16 addrspace(3)* %ptr4.bc, align 2
-
- %ptr1.ac = addrspacecast i32 addrspace(3)* %ptr1 to i32*
- %ptr2.ac = addrspacecast i32 addrspace(3)* %ptr2 to i32*
- %ptr3.ac = addrspacecast i32 addrspace(3)* %ptr3 to i32*
- %ptr4.ac = addrspacecast i32 addrspace(3)* %ptr4 to i32*
-
- store i32 21, i32* %ptr1.ac, align 4
- store i32 22, i32* %ptr2.ac, align 4
- store i32 23, i32* %ptr3.ac, align 4
- store i32 24, i32* %ptr4.ac, align 4
+ store i64 0, ptr addrspace(3) @lds.4, align 8
+
+ %ptr1 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 2
+ %ptr2 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 3
+ %ptr3 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 4
+ %ptr4 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 5
+ %ptr5 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 %x
+
+ store i32 1, ptr addrspace(3) %ptr1, align 4
+ store i32 2, ptr addrspace(3) %ptr2, align 4
+ store i32 3, ptr addrspace(3) %ptr3, align 4
+ store i32 4, ptr addrspace(3) %ptr4, align 4
+ store i32 5, ptr addrspace(3) %ptr5, align 4
+
+ %load1 = load i32, ptr addrspace(3) %ptr1, align 4
+ %load2 = load i32, ptr addrspace(3) %ptr2, align 4
+ %load3 = load i32, ptr addrspace(3) %ptr3, align 4
+ %load4 = load i32, ptr addrspace(3) %ptr4, align 4
+ %load5 = load i32, ptr addrspace(3) %ptr5, align 4
+
+ %val1 = atomicrmw volatile add ptr addrspace(3) %ptr1, i32 1 monotonic, align 4
+ %val2 = cmpxchg volatile ptr addrspace(3) %ptr1, i32 1, i32 2 monotonic monotonic, align 4
+
+
+ store i16 11, ptr addrspace(3) %ptr1, align 2
+ store i16 12, ptr addrspace(3) %ptr2, align 2
+ store i16 13, ptr addrspace(3) %ptr3, align 2
+ store i16 14, ptr addrspace(3) %ptr4, align 2
+
+ %ptr1.ac = addrspacecast ptr addrspace(3) %ptr1 to ptr
+ %ptr2.ac = addrspacecast ptr addrspace(3) %ptr2 to ptr
+ %ptr3.ac = addrspacecast ptr addrspace(3) %ptr3 to ptr
+ %ptr4.ac = addrspacecast ptr addrspace(3) %ptr4 to ptr
+
+ store i32 21, ptr %ptr1.ac, align 4
+ store i32 22, ptr %ptr2.ac, align 4
+ store i32 23, ptr %ptr3.ac, align 4
+ store i32 24, ptr %ptr4.ac, align 4
ret void
}
-@lds.6 = internal unnamed_addr addrspace(3) global [2 x i32 addrspace(3)*] undef, align 4
+@lds.6 = internal unnamed_addr addrspace(3) global [2 x ptr addrspace(3)] undef, align 4
; Check that aligment is not propagated if use is not a pointer operand.
; CHECK-LABEL: @k4
-; SUPER-ALIGN_ON: store i32 undef, i32 addrspace(3)* %ptr, align 8
-; SUPER-ALIGN_OFF: store i32 undef, i32 addrspace(3)* %ptr, align 4
-; CHECK: store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4
-; SUPER-ALIGN_ON: %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 8
-; SUPER-ALIGN_OFF: %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4
-; CHECK: %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4
+; SUPER-ALIGN_ON: store i32 undef, ptr addrspace(3) %gep, align 8
+; SUPER-ALIGN_OFF: store i32 undef, ptr addrspace(3) %gep, align 4
+; CHECK: store ptr addrspace(3) %gep, ptr undef, align 4
+; SUPER-ALIGN_ON: %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 8
+; SUPER-ALIGN_OFF: %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 4
+; CHECK: %val2 = cmpxchg volatile ptr undef, ptr addrspace(3) %gep, ptr addrspace(3) undef monotonic monotonic, align 4
define amdgpu_kernel void @k4() {
- %gep = getelementptr inbounds i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* bitcast ([2 x i32 addrspace(3)*] addrspace(3)* @lds.6 to i32 addrspace(3)* addrspace(3)*), i64 1
- %ptr = bitcast i32 addrspace(3)* addrspace(3)* %gep to i32 addrspace(3)*
- store i32 undef, i32 addrspace(3)* %ptr, align 4
- store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4
- %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4
- %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4
+ %gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) @lds.6, i64 1
+ store i32 undef, ptr addrspace(3) %gep, align 4
+ store ptr addrspace(3) %gep, ptr undef, align 4
+ %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 4
+ %val2 = cmpxchg volatile ptr undef, ptr addrspace(3) %gep, ptr addrspace(3) undef monotonic monotonic, align 4
ret void
}
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
@lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
@lds.size.2.align.2 = internal unnamed_addr addrspace(3) global [2 x i8] undef, align 2
;.
define amdgpu_kernel void @k0() {
; CHECK-LABEL: @k0(
-; CHECK-NEXT: %lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 1, i8 addrspace(3)* %lds.size.1.align.1.bc, align 2
-; CHECK-NEXT: %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 4
-; CHECK-NEXT: %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 16
-; CHECK-NEXT: %lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 0) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 16, i8 addrspace(3)* %lds.size.16.align.16.bc, align 16
-; CHECK-NEXT: ret void
-;
- %lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* @lds.size.1.align.1 to i8 addrspace(3)*
- store i8 1, i8 addrspace(3)* %lds.size.1.align.1.bc, align 1
+; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !0, !noalias !3
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !7, !noalias !8
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !9, !noalias !10
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !11, !noalias !12
+; CHECK-NEXT: ret void
+ store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
- %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* @lds.size.2.align.2 to i8 addrspace(3)*
- store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 2
+ store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2
- %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* @lds.size.4.align.4 to i8 addrspace(3)*
- store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 4
+ store i8 4, ptr addrspace(3) @lds.size.4.align.4, align 4
- %lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* @lds.size.16.align.16 to i8 addrspace(3)*
- store i8 16, i8 addrspace(3)* %lds.size.16.align.16.bc, align 16
+ store i8 16, ptr addrspace(3) @lds.size.16.align.16, align 16
ret void
}
define amdgpu_kernel void @k1() {
; CHECK-LABEL: @k1(
-; CHECK-NEXT: %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 4
-; CHECK-NEXT: %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 16
-; CHECK-NEXT: %lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 0) to i8 addrspace(3)*
-; CHECK-NEXT: store i8 16, i8 addrspace(3)* %lds.size.16.align.16.bc, align 16
-; CHECK-NEXT: ret void
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !13, !noalias !16
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !19, !noalias !20
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !21, !noalias !22
+; CHECK-NEXT: ret void
;
- %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* @lds.size.2.align.2 to i8 addrspace(3)*
- store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 2
+ store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2
- %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* @lds.size.4.align.4 to i8 addrspace(3)*
- store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 4
+ store i8 4, ptr addrspace(3) @lds.size.4.align.4, align 4
- %lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* @lds.size.16.align.16 to i8 addrspace(3)*
- store i8 16, i8 addrspace(3)* %lds.size.16.align.16.bc, align 16
+ store i8 16, ptr addrspace(3) @lds.size.16.align.16, align 16
ret void
}
define amdgpu_ps void @k2() {
; CHECK-LABEL: @k2(
-; CHECK-NEXT: %lds.k2.bc = bitcast [1 x i8] addrspace(3)* @lds.k2 to i8 addrspace(3)*
-; CHECK-NEXT: store i8 1, i8 addrspace(3)* %lds.k2.bc, align 1
+; CHECK-NEXT: store i8 1, ptr addrspace(3) @lds.k2, align 1
; CHECK-NEXT: ret void
;
- %lds.k2.bc = bitcast [1 x i8] addrspace(3)* @lds.k2 to i8 addrspace(3)*
- store i8 1, i8 addrspace(3)* %lds.k2.bc, align 1
+ store i8 1, ptr addrspace(3) @lds.k2, align 1
ret void
}
-; RUN: llc -opaque-pointers=0 -march=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
%vec_type = type { %vec_base }
%vec_base = type { %union.anon }
;.
; CHECK: @[[LLVM_AMDGCN_KERNEL_TEST_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]] undef, align 4
;.
-define protected amdgpu_kernel void @test(i8 addrspace(1)* nocapture %ptr.coerce) local_unnamed_addr #0 {
+define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 {
; GCN-LABEL: test:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_endpgm
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[VEC_TYPE:%.*]], [[VEC_TYPE]] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]], [[LLVM_AMDGCN_KERNEL_TEST_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.test.lds, i32 0, i32 0), i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
-; CHECK-NEXT: store i8 3, i8 addrspace(3)* [[TMP0]], align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[VEC_TYPE]], [[VEC_TYPE]] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], [[LLVM_AMDGCN_KERNEL_TEST_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[VEC_TYPE]], [[VEC_TYPE]] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], [[LLVM_AMDGCN_KERNEL_TEST_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.test.lds, i32 0, i32 0), i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
-; CHECK-NEXT: tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) [[TMP1]], i8 addrspace(3)* noundef align 1 dereferenceable(3) [[TMP2]], i64 3, i1 false), !alias.scope !5, !noalias !6
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[VEC_TYPE]], [[VEC_TYPE]] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], [[LLVM_AMDGCN_KERNEL_TEST_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8 addrspace(3)* [[TMP3]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !5, !noalias !6
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP4]], 3
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[VEC_TYPE]], [[VEC_TYPE]] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], [[LLVM_AMDGCN_KERNEL_TEST_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.test.lds, i32 0, i32 0), i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
-; CHECK-NEXT: store i8 2, i8 addrspace(3)* [[TMP5]], align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[VEC_TYPE]], [[VEC_TYPE]] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], [[LLVM_AMDGCN_KERNEL_TEST_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[VEC_TYPE]], [[VEC_TYPE]] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], [[LLVM_AMDGCN_KERNEL_TEST_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.test.lds, i32 0, i32 0), i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
-; CHECK-NEXT: tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) [[TMP6]], i8 addrspace(3)* noundef align 1 dereferenceable(3) [[TMP7]], i64 3, i1 false), !alias.scope !5, !noalias !6
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[VEC_TYPE]], [[VEC_TYPE]] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], [[LLVM_AMDGCN_KERNEL_TEST_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8 addrspace(3)* [[TMP8]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !5, !noalias !6
+; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[CMP_I_I19:%.*]] = icmp eq i8 [[TMP9]], 2
; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[CMP_I_I19]], [[CMP_I_I]]
; CHECK-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP10]] to i8
-; CHECK-NEXT: store i8 [[FROMBOOL8]], i8 addrspace(1)* [[PTR_COERCE:%.*]], align 1
+; CHECK-NEXT: store i8 [[FROMBOOL8]], ptr addrspace(1) [[PTR_COERCE:%.*]], align 1
; CHECK-NEXT: ret void
;
entry:
- store i8 3, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1
- tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 3, i1 false)
- %0 = load i8, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1
+ store i8 3, ptr addrspace(3) @_f1, align 1
+ tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false)
+ %0 = load i8, ptr addrspace(3) @_f2, align 1
%cmp.i.i = icmp eq i8 %0, 3
- store i8 2, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1
- tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 3, i1 false)
- %1 = load i8, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1
+ store i8 2, ptr addrspace(3) @_f1, align 1
+ tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false)
+ %1 = load i8, ptr addrspace(3) @_f2, align 1
%cmp.i.i19 = icmp eq i8 %1, 2
%2 = and i1 %cmp.i.i19, %cmp.i.i
%frombool8 = zext i1 %2 to i8
- store i8 %frombool8, i8 addrspace(1)* %ptr.coerce, align 1
+ store i8 %frombool8, ptr addrspace(1) %ptr.coerce, align 1
ret void
}
-declare void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i64, i1 immarg) #1
+declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #1
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
@a = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
@b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
; CHECK-LABEL: @no_clobber_ds_load_stores_x2_preexisting_aa
-; CHECK: store i32 1, i32 addrspace(3)* %0, align 16, !tbaa !0, !noalias !5
-; CHECK: %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !tbaa !0, !noalias !5
-; CHECK: store i32 2, i32 addrspace(3)* %1, align 16, !tbaa !0, !noalias !5
-; CHECK: %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !tbaa !0, !noalias !5
+; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa !0, !noalias !5
+; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
+; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa !0, !noalias !5
+; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5
-define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(i32 addrspace(1)* %arg, i32 %i) {
+define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrspace(1) %arg, i32 %i) {
bb:
- store i32 1, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 0), align 4, !alias.scope !0, !noalias !3, !tbaa !5
- %gep.a = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 %i
- %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !alias.scope !0, !noalias !3, !tbaa !5
- store i32 2, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 0), align 4, !alias.scope !3, !noalias !0, !tbaa !5
- %gep.b = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 %i
- %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !alias.scope !3, !noalias !0, !tbaa !5
+ store i32 1, ptr addrspace(3) @a, align 4, !alias.scope !0, !noalias !3, !tbaa !5
+ %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
+ %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !0, !noalias !3, !tbaa !5
+ store i32 2, ptr addrspace(3) @b, align 4, !alias.scope !3, !noalias !0, !tbaa !5
+ %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
+ %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !3, !noalias !0, !tbaa !5
%val = add i32 %val.a, %val.b
- store i32 %val, i32 addrspace(1)* %arg, align 4
+ store i32 %val, ptr addrspace(1) %arg, align 4
ret void
}
-; RUN: llc -opaque-pointers=0 -march=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
@a = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
@b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
; GCN: ds_read_b32
; CHECK-LABEL: @no_clobber_ds_load_stores_x2
-; CHECK: store i32 1, i32 addrspace(3)* %0, align 16, !alias.scope !0, !noalias !3
-; CHECK: %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !alias.scope !0, !noalias !3
-; CHECK: store i32 2, i32 addrspace(3)* %1, align 16, !alias.scope !3, !noalias !0
-; CHECK: %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !alias.scope !3, !noalias !0
+; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !0, !noalias !3
+; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !0, !noalias !3
+; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !3, !noalias !0
+; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !3, !noalias !0
-define amdgpu_kernel void @no_clobber_ds_load_stores_x2(i32 addrspace(1)* %arg, i32 %i) {
+define amdgpu_kernel void @no_clobber_ds_load_stores_x2(ptr addrspace(1) %arg, i32 %i) {
bb:
- store i32 1, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 0), align 4
- %gep.a = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 %i
- %val.a = load i32, i32 addrspace(3)* %gep.a, align 4
- store i32 2, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 0), align 4
- %gep.b = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 %i
- %val.b = load i32, i32 addrspace(3)* %gep.b, align 4
+ store i32 1, ptr addrspace(3) @a, align 4
+ %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
+ %val.a = load i32, ptr addrspace(3) %gep.a, align 4
+ store i32 2, ptr addrspace(3) @b, align 4
+ %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
+ %val.b = load i32, ptr addrspace(3) %gep.b, align 4
%val = add i32 %val.a, %val.b
- store i32 %val, i32 addrspace(1)* %arg, align 4
+ store i32 %val, ptr addrspace(1) %arg, align 4
ret void
}
; GCN-DAG: ds_read_b32
; CHECK-LABEL: @no_clobber_ds_load_stores_x3
-; CHECK: store i32 1, i32 addrspace(3)* %0, align 16, !alias.scope !5, !noalias !8
-; CHECK: %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !alias.scope !5, !noalias !8
-; CHECK: store i32 2, i32 addrspace(3)* %1, align 16, !alias.scope !11, !noalias !12
-; CHECK: %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !alias.scope !11, !noalias !12
-; CHECK: store i32 3, i32 addrspace(3)* %2, align 16, !alias.scope !13, !noalias !14
-; CHECK: %val.c = load i32, i32 addrspace(3)* %gep.c, align 4, !alias.scope !13, !noalias !14
+; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !5, !noalias !8
+; CHECK: %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 %i
+; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !5, !noalias !8
+; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !11, !noalias !12
+; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !11, !noalias !12
+; CHECK: store i32 3, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !13, !noalias !14
+; CHECK: %val.c = load i32, ptr addrspace(3) %gep.c, align 4, !alias.scope !13, !noalias !14
-define amdgpu_kernel void @no_clobber_ds_load_stores_x3(i32 addrspace(1)* %arg, i32 %i) {
+define amdgpu_kernel void @no_clobber_ds_load_stores_x3(ptr addrspace(1) %arg, i32 %i) {
bb:
- store i32 1, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 0), align 4
- %gep.a = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 %i
- %val.a = load i32, i32 addrspace(3)* %gep.a, align 4
- store i32 2, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 0), align 4
- %gep.b = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 %i
- %val.b = load i32, i32 addrspace(3)* %gep.b, align 4
- store i32 3, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @c, i32 0, i32 0), align 4
- %gep.c = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @c, i32 0, i32 %i
- %val.c = load i32, i32 addrspace(3)* %gep.c, align 4
+ store i32 1, ptr addrspace(3) @a, align 4
+ %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
+ %val.a = load i32, ptr addrspace(3) %gep.a, align 4
+ store i32 2, ptr addrspace(3) @b, align 4
+ %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
+ %val.b = load i32, ptr addrspace(3) %gep.b, align 4
+ store i32 3, ptr addrspace(3) @c, align 4
+ %gep.c = getelementptr inbounds [64 x i32], ptr addrspace(3) @c, i32 0, i32 %i
+ %val.c = load i32, ptr addrspace(3) %gep.c, align 4
%val.1 = add i32 %val.a, %val.b
%val = add i32 %val.1, %val.c
- store i32 %val, i32 addrspace(1)* %arg, align 4
+ store i32 %val, ptr addrspace(1) %arg, align 4
ret void
}
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
; CHECK: %llvm.amdgcn.module.lds.t = type { float, float }
; CHECK: %llvm.amdgcn.kernel.timestwo.lds.t = type { float, float }
; CHECK: @llvm.amdgcn.kernel.timestwo.lds = internal addrspace(3) global %llvm.amdgcn.kernel.timestwo.lds.t undef, align 4
; CHECK-LABEL: @get_func()
-; CHECK: %0 = bitcast float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) to i32 addrspace(3)*
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %0 to i32*
-; CHECK: %2 = ptrtoint i32* %1 to i64
-; CHECK: %3 = bitcast float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) to i32 addrspace(3)*
-; CHECK: %4 = addrspacecast i32 addrspace(3)* %3 to i32*
-; CHECK: %5 = ptrtoint i32* %4 to i64
-; CHECK: %6 = add i64 %2, %5
-; CHECK: %7 = inttoptr i64 %6 to i32*
-; CHECK: %8 = load i32, i32* %7, align 4
-; CHECK: ret i32 %8
+; CHECK: %0 = addrspacecast ptr addrspace(3) @llvm.amdgcn.module.lds to ptr
+; CHECK: %1 = ptrtoint ptr %0 to i64
+; CHECK: %2 = addrspacecast ptr addrspace(3) @llvm.amdgcn.module.lds to ptr
+; CHECK: %3 = ptrtoint ptr %2 to i64
+; CHECK: %4 = add i64 %1, %3
+; CHECK: %5 = inttoptr i64 %4 to ptr
+; CHECK: %6 = load i32, ptr %5, align 4
+; CHECK: ret i32 %6
define i32 @get_func() local_unnamed_addr #0 {
entry:
- %0 = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @a_func to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @a_func to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
+ %0 = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @a_func to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @a_func to ptr) to i64)) to ptr), align 4
ret i32 %0
}
; CHECK-LABEL: @set_func(i32 %x)
-; CHECK: %0 = bitcast float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i32 addrspace(3)*
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %0 to i32*
-; CHECK: %2 = ptrtoint i32* %1 to i64
-; CHECK: %3 = bitcast float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i32 addrspace(3)*
-; CHECK: %4 = addrspacecast i32 addrspace(3)* %3 to i32*
-; CHECK: %5 = ptrtoint i32* %4 to i64
-; CHECK: %6 = add i64 %2, %5
-; CHECK: %7 = inttoptr i64 %6 to i32*
-; CHECK: store i32 %x, i32* %7, align 4
+; CHECK: %0 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1) to ptr
+; CHECK: %1 = ptrtoint ptr %0 to i64
+; CHECK: %2 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1) to ptr
+; CHECK: %3 = ptrtoint ptr %2 to i64
+; CHECK: %4 = add i64 %1, %3
+; CHECK: %5 = inttoptr i64 %4 to ptr
+; CHECK: store i32 %x, ptr %5, align 4
; CHECK: ret void
define void @set_func(i32 %x) local_unnamed_addr #1 {
entry:
- store i32 %x, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
+ store i32 %x, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64)) to ptr), align 4
ret void
}
; CHECK-LABEL: @timestwo() #0
; CHECK-NOT: call void @llvm.donothing()
-; CHECK: %1 = bitcast float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, %llvm.amdgcn.kernel.timestwo.lds.t addrspace(3)* @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 0) to i32 addrspace(3)*
-; CHECK: %2 = addrspacecast i32 addrspace(3)* %1 to i32*
-; CHECK: %3 = ptrtoint i32* %2 to i64
-; CHECK: %4 = bitcast float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, %llvm.amdgcn.kernel.timestwo.lds.t addrspace(3)* @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to i32 addrspace(3)*
-; CHECK: %5 = addrspacecast i32 addrspace(3)* %4 to i32*
-; CHECK: %6 = ptrtoint i32* %5 to i64
-; CHECK: %7 = add i64 %3, %6
-; CHECK: %8 = inttoptr i64 %7 to i32*
-; CHECK: %ld = load i32, i32* %8, align 4
+; CHECK: %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
+; CHECK: %2 = ptrtoint ptr %1 to i64
+; CHECK: %3 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
+; CHECK: %4 = ptrtoint ptr %3 to i64
+; CHECK: %5 = add i64 %2, %4
+; CHECK: %6 = inttoptr i64 %5 to ptr
+; CHECK: %ld = load i32, ptr %6, align 4
; CHECK: %mul = mul i32 %ld, 2
-; CHECK: %9 = bitcast float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, %llvm.amdgcn.kernel.timestwo.lds.t addrspace(3)* @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to i32 addrspace(3)*
-; CHECK: %10 = addrspacecast i32 addrspace(3)* %9 to i32*
-; CHECK: %11 = ptrtoint i32* %10 to i64
-; CHECK: %12 = bitcast float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, %llvm.amdgcn.kernel.timestwo.lds.t addrspace(3)* @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 0) to i32 addrspace(3)*
-; CHECK: %13 = addrspacecast i32 addrspace(3)* %12 to i32*
-; CHECK: %14 = ptrtoint i32* %13 to i64
-; CHECK: %15 = add i64 %11, %14
-; CHECK: %16 = inttoptr i64 %15 to i32*
-; CHECK: store i32 %mul, i32* %16, align 4
+; CHECK: %7 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
+; CHECK: %8 = ptrtoint ptr %7 to i64
+; CHECK: %9 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
+; CHECK: %10 = ptrtoint ptr %9 to i64
+; CHECK: %11 = add i64 %8, %10
+; CHECK: %12 = inttoptr i64 %11 to ptr
+; CHECK: store i32 %mul, ptr %12, align 4
; CHECK: ret void
define amdgpu_kernel void @timestwo() {
- %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
+ %ld = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @kern to ptr) to i64)) to ptr), align 4
%mul = mul i32 %ld, 2
- store i32 %mul, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
+ store i32 %mul, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @kern to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64)) to ptr), align 4
ret void
}
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn--amdhsa -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefix=OPT %s
-; RUN: llc -opaque-pointers=0 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn--amdhsa -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefix=OPT %s
+; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefix=GCN %s
-; Opt checks from utils/update_test_checks.py, llc -opaque-pointers=0 checks from utils/update_llc_test_checks.py, both modified.
+; Opt checks from utils/update_test_checks.py, llc checks from utils/update_llc_test_checks.py, both modified.
; Define four variables and four non-kernel functions which access exactly one variable each
@v0 = addrspace(3) global float undef
; The kernel naming pattern and the structs being named after the functions helps verify placement of undef
; The remainder are constant expressions into the variable instances checked above
-; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x i32]] [[4 x i32] [i32 ptrtoint (float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, %llvm.amdgcn.kernel.k01.lds.t addrspace(3)* @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2) to i32), i32 ptrtoint (%llvm.amdgcn.kernel.k01.lds.t addrspace(3)* @llvm.amdgcn.kernel.k01.lds to i32), i32 poison, i32 poison], [4 x i32] [i32 poison, i32 ptrtoint (%llvm.amdgcn.kernel.k123.lds.t addrspace(3)* @llvm.amdgcn.kernel.k123.lds to i32), i32 ptrtoint (i64 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, %llvm.amdgcn.kernel.k123.lds.t addrspace(3)* @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3) to i32), i32 ptrtoint (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, %llvm.amdgcn.kernel.k123.lds.t addrspace(3)* @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1) to i32)], [4 x i32] [i32 poison, i32 poison, i32 ptrtoint (%llvm.amdgcn.kernel.k23.lds.t addrspace(3)* @llvm.amdgcn.kernel.k23.lds to i32), i32 ptrtoint (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, %llvm.amdgcn.kernel.k23.lds.t addrspace(3)* @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1) to i32)]]
+; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x i32]] [[4 x i32] [i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2) to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds to i32), i32 poison, i32 poison], [4 x i32] [i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3) to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1) to i32)], [4 x i32] [i32 poison, i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1) to i32)]]
+
define void @f0() {
; OPT-LABEL: @f0(
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
-; OPT-NEXT: [[V02:%.*]] = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]] addrspace(4)* @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
-; OPT-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(4)* [[V02]], align 4
-; OPT-NEXT: [[V03:%.*]] = inttoptr i32 [[TMP2]] to float addrspace(3)*
-; OPT-NEXT: [[LD:%.*]] = load float, float addrspace(3)* [[V03]], align 4
+; OPT-NEXT: [[V02:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V02]], align 4
+; OPT-NEXT: [[V03:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; OPT-NEXT: [[LD:%.*]] = load float, ptr addrspace(3) [[V03]], align 4
; OPT-NEXT: [[MUL:%.*]] = fmul float [[LD]], 2.000000e+00
-; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]] addrspace(4)* @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
-; OPT-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[V0]], align 4
-; OPT-NEXT: [[V01:%.*]] = inttoptr i32 [[TMP3]] to float addrspace(3)*
-; OPT-NEXT: store float [[MUL]], float addrspace(3)* [[V01]], align 4
+; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V0]], align 4
+; OPT-NEXT: [[V01:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
+; OPT-NEXT: store float [[MUL]], ptr addrspace(3) [[V01]], align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: f0:
; GCN-NEXT: ds_write_b32 v0, v1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
- %ld = load float, float addrspace(3)* @v0
+ %ld = load float, ptr addrspace(3) @v0
%mul = fmul float %ld, 2.
- store float %mul, float addrspace(3)* @v0
+ store float %mul, ptr addrspace(3) @v0
ret void
}
define void @f1() {
; OPT-LABEL: @f1(
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
-; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]] addrspace(4)* @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
-; OPT-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(4)* [[V12]], align 4
-; OPT-NEXT: [[V13:%.*]] = inttoptr i32 [[TMP2]] to i16 addrspace(3)*
-; OPT-NEXT: [[LD:%.*]] = load i16, i16 addrspace(3)* [[V13]], align 2
+; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V12]], align 4
+; OPT-NEXT: [[V13:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[V13]], align 2
; OPT-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
-; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]] addrspace(4)* @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
-; OPT-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[V1]], align 4
-; OPT-NEXT: [[V11:%.*]] = inttoptr i32 [[TMP3]] to i16 addrspace(3)*
-; OPT-NEXT: store i16 [[MUL]], i16 addrspace(3)* [[V11]], align 2
+; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V1]], align 4
+; OPT-NEXT: [[V11:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
+; OPT-NEXT: store i16 [[MUL]], ptr addrspace(3) [[V11]], align 2
; OPT-NEXT: ret void
;
; GCN-LABEL: f1:
; GCN-NEXT: ds_write_b16 v0, v1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
- %ld = load i16, i16 addrspace(3)* @v1
+ %ld = load i16, ptr addrspace(3) @v1
%mul = mul i16 %ld, 3
- store i16 %mul, i16 addrspace(3)* @v1
+ store i16 %mul, ptr addrspace(3) @v1
ret void
}
define void @f2() {
; OPT-LABEL: @f2(
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
-; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]] addrspace(4)* @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
-; OPT-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(4)* [[V22]], align 4
-; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to i64 addrspace(3)*
-; OPT-NEXT: [[LD:%.*]] = load i64, i64 addrspace(3)* [[V23]], align 4
+; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
+; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
+; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 4
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
-; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]] addrspace(4)* @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
-; OPT-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[V2]], align 4
-; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to i64 addrspace(3)*
-; OPT-NEXT: store i64 [[MUL]], i64 addrspace(3)* [[V21]], align 4
+; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
+; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
+; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
+; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: f2:
; GCN-NEXT: ds_write_b64 v2, v[0:1]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
- %ld = load i64, i64 addrspace(3)* @v2
+ %ld = load i64, ptr addrspace(3) @v2
%mul = mul i64 %ld, 4
- store i64 %mul, i64 addrspace(3)* @v2
+ store i64 %mul, ptr addrspace(3) @v2
ret void
}
define void @f3() {
; OPT-LABEL: @f3(
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
-; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]] addrspace(4)* @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
-; OPT-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(4)* [[V32]], align 4
-; OPT-NEXT: [[V33:%.*]] = inttoptr i32 [[TMP2]] to i8 addrspace(3)*
-; OPT-NEXT: [[LD:%.*]] = load i8, i8 addrspace(3)* [[V33]], align 1
+; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
+; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V32]], align 4
+; OPT-NEXT: [[V33:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) [[V33]], align 1
; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 5
-; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]] addrspace(4)* @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
-; OPT-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[V3]], align 4
-; OPT-NEXT: [[V31:%.*]] = inttoptr i32 [[TMP3]] to i8 addrspace(3)*
-; OPT-NEXT: store i8 [[MUL]], i8 addrspace(3)* [[V31]], align 1
+; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
+; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V3]], align 4
+; OPT-NEXT: [[V31:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
+; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) [[V31]], align 1
; OPT-NEXT: ret void
;
; GCN-LABEL: f3:
; GCN-NEXT: ds_write_b8 v0, v1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
- %ld = load i8, i8 addrspace(3)* @v3
+ %ld = load i8, ptr addrspace(3) @v3
%mul = mul i8 %ld, 5
- store i8 %mul, i8 addrspace(3)* @v3
+ store i8 %mul, ptr addrspace(3) @v3
ret void
}
; Doesn't access any via a function, won't be in the lookup table
define amdgpu_kernel void @kernel_no_table() {
; OPT-LABEL: @kernel_no_table() {
-; OPT-NEXT: [[LD:%.*]] = load i64, i64 addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_KERNEL_NO_TABLE_LDS_T:%.*]], [[LLVM_AMDGCN_KERNEL_KERNEL_NO_TABLE_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.kernel_no_table.lds, i32 0, i32 0), align 8
+; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 8
-; OPT-NEXT: store i64 [[MUL]], i64 addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_KERNEL_NO_TABLE_LDS_T]], [[LLVM_AMDGCN_KERNEL_KERNEL_NO_TABLE_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.kernel_no_table.lds, i32 0, i32 0), align 8
+; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
; OPT-NEXT: ret void
;
; GCN-LABEL: kernel_no_table:
; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 3
; GCN-NEXT: ds_write_b64 v2, v[0:1]
; GCN-NEXT: s_endpgm
- %ld = load i64, i64 addrspace(3)* @v2
+ %ld = load i64, ptr addrspace(3) @v2
%mul = mul i64 %ld, 8
- store i64 %mul, i64 addrspace(3)* @v2
+ store i64 %mul, ptr addrspace(3) @v2
ret void
}
; Access two variables, will allocate those two
define amdgpu_kernel void @k01() {
; OPT-LABEL: @k01() !llvm.amdgcn.lds.kernel.id !0 {
-; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"([[LLVM_AMDGCN_KERNEL_K01_LDS_T:%.*]] addrspace(3)* @llvm.amdgcn.kernel.k01.lds) ]
+; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ]
; OPT-NEXT: call void @f0()
; OPT-NEXT: call void @f1()
; OPT-NEXT: ret void
define amdgpu_kernel void @k23() {
; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !1 {
-; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"([[LLVM_AMDGCN_KERNEL_K23_LDS_T:%.*]] addrspace(3)* @llvm.amdgcn.kernel.k23.lds) ]
+; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ]
; OPT-NEXT: call void @f2()
; OPT-NEXT: call void @f3()
; OPT-NEXT: ret void
; Access and allocate three variables
define amdgpu_kernel void @k123() {
; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !2 {
-; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]] addrspace(3)* @llvm.amdgcn.kernel.k123.lds) ]
+; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ]
; OPT-NEXT: call void @f1()
-; OPT-NEXT: [[LD:%.*]] = load i8, i8 addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], [[LLVM_AMDGCN_KERNEL_K123_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6
+; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6
; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8
-; OPT-NEXT: store i8 [[MUL]], i8 addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], [[LLVM_AMDGCN_KERNEL_K123_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6
+; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6
; OPT-NEXT: call void @f2()
; OPT-NEXT: ret void
;
; GCN-NEXT: s_endpgm
; GCN: .amdhsa_group_segment_fixed_size 16
call void @f1()
- %ld = load i8, i8 addrspace(3)* @v3
+ %ld = load i8, ptr addrspace(3) @v3
%mul = mul i8 %ld, 8
- store i8 %mul, i8 addrspace(3)* @v3
+ store i8 %mul, ptr addrspace(3) @v3
call void @f2()
ret void
}
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
; Padding to meet alignment, so references to @var1 replaced with gep ptr, 0, 2
; No i64 as addrspace(3) types with initializers are ignored. Likewise no addrspace(4).
; The invalid use by the global is left unchanged
; CHECK: @var1 = addrspace(3) global i32 undef, align 8
-; CHECK: @ptr = addrspace(1) global i32 addrspace(3)* @var1, align 4
-@ptr = addrspace(1) global i32 addrspace(3)* @var1, align 4
+; CHECK: @ptr = addrspace(1) global ptr addrspace(3) @var1, align 4
+@ptr = addrspace(1) global ptr addrspace(3) @var1, align 4
; A variable that is unchanged by pass
; CHECK: @with_init = addrspace(3) global i64 0
; Use in func rewritten to access struct at address zero
; CHECK-LABEL: @func()
-; CHECK: %dec = atomicrmw fsub float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0), float 1.0
-; CHECK: %val0 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 8
+; CHECK: %dec = atomicrmw fsub ptr addrspace(3) @llvm.amdgcn.module.lds, float 1.0
+; CHECK: %val0 = load i32, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 8
; CHECK: %val1 = add i32 %val0, 4
-; CHECK: store i32 %val1, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 8
-; CHECK: %unused0 = atomicrmw add i64 addrspace(3)* @with_init, i64 1 monotonic
+; CHECK: store i32 %val1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 8
+; CHECK: %unused0 = atomicrmw add ptr addrspace(3) @with_init, i64 1 monotonic
define void @func() {
- %dec = atomicrmw fsub float addrspace(3)* @var0, float 1.0 monotonic
- %val0 = load i32, i32 addrspace(3)* @var1, align 4
+ %dec = atomicrmw fsub ptr addrspace(3) @var0, float 1.0 monotonic
+ %val0 = load i32, ptr addrspace(3) @var1, align 4
%val1 = add i32 %val0, 4
- store i32 %val1, i32 addrspace(3)* @var1, align 4
- %unused0 = atomicrmw add i64 addrspace(3)* @with_init, i64 1 monotonic
+ store i32 %val1, ptr addrspace(3) @var1, align 4
+ %unused0 = atomicrmw add ptr addrspace(3) @with_init, i64 1 monotonic
ret void
}
; This kernel calls a function that uses LDS so needs the block
; CHECK-LABEL: @kern_call()
-; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
+; CHECK: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; CHECK: call void @func()
-; CHECK: %dec = atomicrmw fsub float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0), float 2.000000e+00 monotonic, align 8
+; CHECK: %dec = atomicrmw fsub ptr addrspace(3) @llvm.amdgcn.module.lds, float 2.000000e+00 monotonic, align 8
define amdgpu_kernel void @kern_call() {
call void @func()
- %dec = atomicrmw fsub float addrspace(3)* @var0, float 2.0 monotonic
+ %dec = atomicrmw fsub ptr addrspace(3) @var0, float 2.0 monotonic
ret void
}
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
; DESCRIPTION:
;
; Pointer replacement code should be added.
define internal void @func_uses_lds() {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_func.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_func.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [4 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
%gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_func, i32 0, i32 0
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_func to i16), i16 addrspace(3)* @lds_used_within_func.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_func to i16), ptr addrspace(3) @lds_used_within_func.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
; DESCRIPTION:
;
; Pointer replacement code should be added.
define internal void @function_3() {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [3 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [3 x i32], [3 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_function_3.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [3 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
- %gep = getelementptr inbounds [3 x i32], [3 x i32] addrspace(3)* @lds_used_within_function_3, i32 0, i32 0
+ %gep = getelementptr inbounds [3 x i32], ptr addrspace(3) @lds_used_within_function_3, i32 0, i32 0
ret void
}
; Pointer replacement code should be added.
define internal void @function_2() {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [2 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_function_2.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [2 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
- %gep = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @lds_used_within_function_2, i32 0, i32 0
+ %gep = getelementptr inbounds [2 x i32], ptr addrspace(3) @lds_used_within_function_2, i32 0, i32 0
ret void
}
; Pointer replacement code should be added.
define internal void @function_1() {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [1 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_function_1.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [1 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
- %gep = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* @lds_used_within_function_1, i32 0, i32 0
+ %gep = getelementptr inbounds [1 x i32], ptr addrspace(3) @lds_used_within_function_1, i32 0, i32 0
ret void
}
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([3 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
-; CHECK: store i16 ptrtoint ([1 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_3 to i16), ptr addrspace(3) @lds_used_within_function_3.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_1 to i16), ptr addrspace(3) @lds_used_within_function_1.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([3 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
-; CHECK: store i16 ptrtoint ([2 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_3 to i16), ptr addrspace(3) @lds_used_within_function_3.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_2 to i16), ptr addrspace(3) @lds_used_within_function_2.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([2 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
-; CHECK: store i16 ptrtoint ([1 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_2 to i16), ptr addrspace(3) @lds_used_within_function_2.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_1 to i16), ptr addrspace(3) @lds_used_within_function_1.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
; DESCRIPTION:
;
@lds_used_within_func = internal addrspace(3) global [4 x i32] undef, align 4
; Function pointer should exist as it is.
-; CHECK: @ptr_to_func = internal local_unnamed_addr externally_initialized global void ()* @func_uses_lds, align 8
-@ptr_to_func = internal local_unnamed_addr externally_initialized global void ()* @func_uses_lds, align 8
+; CHECK: @ptr_to_func = internal local_unnamed_addr externally_initialized global ptr @func_uses_lds, align 8
+@ptr_to_func = internal local_unnamed_addr externally_initialized global ptr @func_uses_lds, align 8
; Pointer should be created.
; CHECK: @lds_used_within_func.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
; Pointer replacement code should be added.
define internal void @func_uses_lds() {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_func.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_func.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [4 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
- %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_func, i32 0, i32 0
+ %gep = getelementptr inbounds [4 x i32], ptr addrspace(3) @lds_used_within_func, i32 0, i32 0
ret void
}
; No change
define internal void @func_does_not_use_lds_3() {
; CHECK-LABEL: entry:
-; CHECK: %fptr = load void ()*, void ()** @ptr_to_func, align 8
+; CHECK: %fptr = load ptr, ptr @ptr_to_func, align 8
; CHECK: call void %fptr()
; CHECK: ret void
entry:
- %fptr = load void ()*, void ()** @ptr_to_func, align 8
+ %fptr = load ptr, ptr @ptr_to_func, align 8
call void %fptr()
ret void
}
; No change
define internal void @func_does_not_use_lds_2() {
; CHECK-LABEL: entry:
-; CHECK: %fptr = load void ()*, void ()** @ptr_to_func, align 8
+; CHECK: %fptr = load ptr, ptr @ptr_to_func, align 8
; CHECK: call void %fptr()
; CHECK: ret void
entry:
- %fptr = load void ()*, void ()** @ptr_to_func, align 8
+ %fptr = load ptr, ptr @ptr_to_func, align 8
call void %fptr()
ret void
}
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_func to i16), i16 addrspace(3)* @lds_used_within_func.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_func to i16), ptr addrspace(3) @lds_used_within_func.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
; DESCRIPTION:
;
@lds_used_within_function_3 = internal addrspace(3) global [4 x i32] undef, align 4
; Function pointers should exist.
-; CHECK: @ptr_to_func1 = internal local_unnamed_addr externally_initialized global void (float)* @function_1, align 8
-; CHECK: @ptr_to_func2 = internal local_unnamed_addr externally_initialized global void (i16)* @function_2, align 8
-; CHECK: @ptr_to_func3 = internal local_unnamed_addr externally_initialized global void (i8)* @function_3, align 8
+; CHECK: @ptr_to_func1 = internal local_unnamed_addr externally_initialized global ptr @function_1, align 8
+; CHECK: @ptr_to_func2 = internal local_unnamed_addr externally_initialized global ptr @function_2, align 8
+; CHECK: @ptr_to_func3 = internal local_unnamed_addr externally_initialized global ptr @function_3, align 8
@ptr_to_func1 = internal local_unnamed_addr externally_initialized global void (float)* @function_1, align 8
@ptr_to_func2 = internal local_unnamed_addr externally_initialized global void (i16)* @function_2, align 8
@ptr_to_func3 = internal local_unnamed_addr externally_initialized global void (i8)* @function_3, align 8
; Pointer replacement code should be added.
define internal void @function_3(i8 %c) {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_function_3.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [4 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
%gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function_3, i32 0, i32 0
; Pointer replacement code should be added.
define internal void @function_2(i16 %i) {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_function_2.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [4 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
%gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function_2, i32 0, i32 0
; Pointer replacement code should be added.
define internal void @function_1(float %f) {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_function_1.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [4 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
%gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function_1, i32 0, i32 0
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_3 to i16), ptr addrspace(3) @lds_used_within_function_3.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_1 to i16), ptr addrspace(3) @lds_used_within_function_1.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
; CHECK: call void @llvm.amdgcn.wave.barrier()
-; CHECK: %fptr3 = load void (i8)*, void (i8)** @ptr_to_func3, align 8
-; CHECK: %fptr1 = load void (float)*, void (float)** @ptr_to_func1, align 8
+; CHECK: %fptr3 = load ptr, ptr @ptr_to_func3, align 8
+; CHECK: %fptr1 = load ptr, ptr @ptr_to_func1, align 8
; CHECK: call void %fptr3(i8 1)
; CHECK: call void %fptr1(float 2.000000e+00)
; CHECK: ret void
entry:
- %fptr3 = load void (i8)*, void (i8)** @ptr_to_func3, align 8
- %fptr1 = load void (float)*, void (float)** @ptr_to_func1, align 8
+ %fptr3 = load ptr, ptr @ptr_to_func3, align 8
+ %fptr1 = load ptr, ptr @ptr_to_func1, align 8
call void %fptr3(i8 1)
call void %fptr1(float 2.0)
ret void
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_3 to i16), ptr addrspace(3) @lds_used_within_function_3.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_2 to i16), ptr addrspace(3) @lds_used_within_function_2.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
; CHECK: call void @llvm.amdgcn.wave.barrier()
-; CHECK: %fptr2 = load void (i16)*, void (i16)** @ptr_to_func2, align 8
-; CHECK: %fptr3 = load void (i8)*, void (i8)** @ptr_to_func3, align 8
+; CHECK: %fptr2 = load ptr, ptr @ptr_to_func2, align 8
+; CHECK: %fptr3 = load ptr, ptr @ptr_to_func3, align 8
; CHECK: call void %fptr2(i16 3)
; CHECK: call void %fptr3(i8 4)
; CHECK: ret void
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_2 to i16), ptr addrspace(3) @lds_used_within_function_2.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_1 to i16), ptr addrspace(3) @lds_used_within_function_1.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
; CHECK: call void @llvm.amdgcn.wave.barrier()
-; CHECK: %fptr1 = load void (float)*, void (float)** @ptr_to_func1, align 8
-; CHECK: %fptr2 = load void (i16)*, void (i16)** @ptr_to_func2, align 8
+; CHECK: %fptr1 = load ptr, ptr @ptr_to_func1, align 8
+; CHECK: %fptr2 = load ptr, ptr @ptr_to_func2, align 8
; CHECK: call void %fptr1(float 5.000000e+00)
; CHECK: call void %fptr2(i16 6)
; CHECK: ret void
entry:
- %fptr1 = load void (float)*, void (float)** @ptr_to_func1, align 8
- %fptr2 = load void (i16)*, void (i16)** @ptr_to_func2, align 8
+ %fptr1 = load ptr, ptr @ptr_to_func1, align 8
+ %fptr2 = load ptr, ptr @ptr_to_func2, align 8
call void %fptr1(float 5.0)
call void %fptr2(i16 6)
ret void
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
; DESCRIPTION:
;
@lds_used_within_function_3 = internal addrspace(3) global [4 x i32] undef, align 4
; Function pointers should exist.
-; CHECK: @ptr_to_func1 = internal local_unnamed_addr externally_initialized global void (i16)* @function_1, align 8
-; CHECK: @ptr_to_func2 = internal local_unnamed_addr externally_initialized global void (i16)* @function_2, align 8
-; CHECK: @ptr_to_func3 = internal local_unnamed_addr externally_initialized global void (i16)* @function_3, align 8
+; CHECK: @ptr_to_func1 = internal local_unnamed_addr externally_initialized global ptr @function_1, align 8
+; CHECK: @ptr_to_func2 = internal local_unnamed_addr externally_initialized global ptr @function_2, align 8
+; CHECK: @ptr_to_func3 = internal local_unnamed_addr externally_initialized global ptr @function_3, align 8
@ptr_to_func1 = internal local_unnamed_addr externally_initialized global void (i16)* @function_1, align 8
@ptr_to_func2 = internal local_unnamed_addr externally_initialized global void (i16)* @function_2, align 8
@ptr_to_func3 = internal local_unnamed_addr externally_initialized global void (i16)* @function_3, align 8
; Pointer replacement code should be added.
define internal void @function_3(i16 %i) {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_function_3.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [4 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
%gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function_3, i32 0, i32 0
; Pointer replacement code should be added.
define internal void @function_2(i16 %i) {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_function_2.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [4 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
%gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function_2, i32 0, i32 0
; Pointer replacement code should be added.
define internal void @function_1(i16 %i) {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
-; CHECK: %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds_used_within_function_1.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep = getelementptr inbounds [4 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
%gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function_1, i32 0, i32 0
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
-; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_3 to i16), ptr addrspace(3) @lds_used_within_function_3.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_2 to i16), ptr addrspace(3) @lds_used_within_function_2.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds_used_within_function_1 to i16), ptr addrspace(3) @lds_used_within_function_1.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
; CHECK: call void @llvm.amdgcn.wave.barrier()
-; CHECK: %fptr1 = load void (i16)*, void (i16)** @ptr_to_func1, align 8
+; CHECK: %fptr1 = load ptr, ptr @ptr_to_func1, align 8
; CHECK: call void %fptr1(i16 6)
; CHECK: ret void
entry:
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
; DESCRIPTION:
;
; Pointer replacement code should be added.
define internal void @function() {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds3.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [3 x i32] addrspace(3)*
-; CHECK: %3 = load i16, i16 addrspace(3)* @lds2.ptr, align 2
-; CHECK: %4 = getelementptr i8, i8 addrspace(3)* null, i16 %3
-; CHECK: %5 = bitcast i8 addrspace(3)* %4 to [2 x i32] addrspace(3)*
-; CHECK: %6 = load i16, i16 addrspace(3)* @lds1.ptr, align 2
-; CHECK: %7 = getelementptr i8, i8 addrspace(3)* null, i16 %6
-; CHECK: %8 = bitcast i8 addrspace(3)* %7 to [1 x i32] addrspace(3)*
-; CHECK: %gep1 = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* %8, i32 0, i32 0
-; CHECK: %gep2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* %5, i32 0, i32 0
-; CHECK: %gep3 = getelementptr inbounds [3 x i32], [3 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds3.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %2 = load i16, ptr addrspace(3) @lds2.ptr, align 2
+; CHECK: %3 = getelementptr i8, ptr addrspace(3) null, i16 %2
+; CHECK: %4 = load i16, ptr addrspace(3) @lds1.ptr, align 2
+; CHECK: %5 = getelementptr i8, ptr addrspace(3) null, i16 %4
+; CHECK: %gep1 = getelementptr inbounds [1 x i32], ptr addrspace(3) %5, i32 0, i32 0
+; CHECK: %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(3) %3, i32 0, i32 0
+; CHECK: %gep3 = getelementptr inbounds [3 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
- %gep1 = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* @lds1, i32 0, i32 0
- %gep2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @lds2, i32 0, i32 0
- %gep3 = getelementptr inbounds [3 x i32], [3 x i32] addrspace(3)* @lds3, i32 0, i32 0
+ %gep1 = getelementptr inbounds [1 x i32], ptr addrspace(3) @lds1, i32 0, i32 0
+ %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(3) @lds2, i32 0, i32 0
+ %gep3 = getelementptr inbounds [3 x i32], ptr addrspace(3) @lds3, i32 0, i32 0
ret void
}
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([3 x i32] addrspace(3)* @lds3 to i16), i16 addrspace(3)* @lds3.ptr, align 2
-; CHECK: store i16 ptrtoint ([2 x i32] addrspace(3)* @lds2 to i16), i16 addrspace(3)* @lds2.ptr, align 2
-; CHECK: store i16 ptrtoint ([1 x i32] addrspace(3)* @lds1 to i16), i16 addrspace(3)* @lds1.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds3 to i16), ptr addrspace(3) @lds3.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds2 to i16), ptr addrspace(3) @lds2.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds1 to i16), ptr addrspace(3) @lds1.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
; DESCRIPTION:
;
; Pointer replacement code should be added.
define internal void @function() {
; CHECK-LABEL: entry:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds1.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [1 x i32] addrspace(3)*
-; CHECK: %gep1 = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* %2, i32 0, i32 0
-; CHECK: %gep2 = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* %2, i32 0, i32 0
-; CHECK: %gep3 = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK: %0 = load i16, ptr addrspace(3) @lds1.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %gep1 = getelementptr inbounds [1 x i32], ptr addrspace(3) %1, i32 0, i32 0
+; CHECK: %gep2 = getelementptr inbounds [1 x i32], ptr addrspace(3) %1, i32 0, i32 0
+; CHECK: %gep3 = getelementptr inbounds [1 x i32], ptr addrspace(3) %1, i32 0, i32 0
; CHECK: ret void
entry:
- %gep1 = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* @lds1, i32 0, i32 0
- %gep2 = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* @lds1, i32 0, i32 0
- %gep3 = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* @lds1, i32 0, i32 0
+ %gep1 = getelementptr inbounds [1 x i32], ptr addrspace(3) @lds1, i32 0, i32 0
+ %gep2 = getelementptr inbounds [1 x i32], ptr addrspace(3) @lds1, i32 0, i32 0
+ %gep3 = getelementptr inbounds [1 x i32], ptr addrspace(3) @lds1, i32 0, i32 0
ret void
}
; CHECK: br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
-; CHECK: store i16 ptrtoint ([1 x i32] addrspace(3)* @lds1 to i16), i16 addrspace(3)* @lds1.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds1 to i16), ptr addrspace(3) @lds1.ptr, align 2
; CHECK: br label %3
;
; CHECK-LABEL: 3:
-; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
; DESCRIPTION:
;
define void @f0(i32 %arg) {
; CHECK-LABEL: bb:
-; CHECK: %0 = load i16, i16 addrspace(3)* @lds.2.ptr, align 2
-; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-; CHECK: %2 = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)*
-; CHECK: %3 = load i16, i16 addrspace(3)* @lds.1.ptr, align 2
-; CHECK: %4 = getelementptr i8, i8 addrspace(3)* null, i16 %3
-; CHECK: %5 = bitcast i8 addrspace(3)* %4 to i32 addrspace(3)*
+; CHECK: %0 = load i16, ptr addrspace(3) @lds.2.ptr, align 2
+; CHECK: %1 = getelementptr i8, ptr addrspace(3) null, i16 %0
+; CHECK: %2 = load i16, ptr addrspace(3) @lds.1.ptr, align 2
+; CHECK: %3 = getelementptr i8, ptr addrspace(3) null, i16 %2
; CHECK: %id = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK: %my.tmp = sub i32 %id, %arg
; CHECK: br label %bb1
; CHECK-LABEL: bb1:
; CHECK: %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
-; CHECK: %6 = icmp ne i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), %5
+; CHECK: %4 = icmp ne ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), %3
; CHECK: %lsr.iv.next = add i32 %lsr.iv, 1
; CHECK: %cmp0 = icmp slt i32 %lsr.iv.next, 0
; CHECK: br i1 %cmp0, label %bb4, label %Flow
br i1 %cmp0, label %bb4, label %Flow
; CHECK-LABEL: bb4:
-; CHECK: %load = load volatile i32, i32 addrspace(1)* undef, align 4
+; CHECK: %load = load volatile i32, ptr addrspace(1) undef, align 4
; CHECK: %cmp1 = icmp sge i32 %my.tmp, %load
; CHECK: br label %Flow
bb4:
- %load = load volatile i32, i32 addrspace(1)* undef, align 4
+ %load = load volatile i32, ptr addrspace(1) undef, align 4
%cmp1 = icmp sge i32 %my.tmp, %load
br label %Flow
; CHECK-LABEL: Flow:
; CHECK: %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
-; CHECK: %my.tmp3 = phi i32 addrspace(3)* [ %2, %bb4 ], [ %5, %bb1 ]
-; CHECK: %my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ %6, %bb1 ]
+; CHECK: %my.tmp3 = phi ptr addrspace(3) [ %1, %bb4 ], [ %3, %bb1 ]
+; CHECK: %my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ %4, %bb1 ]
; CHECK: br i1 %my.tmp4, label %bb9, label %bb1
Flow:
%my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
- %my.tmp3 = phi i32 addrspace(3)* [@lds.2, %bb4 ], [ @lds.1, %bb1 ]
- %my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds.1), %bb1 ]
+ %my.tmp3 = phi ptr addrspace(3) [@lds.2, %bb4 ], [ @lds.1, %bb1 ]
+ %my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), ptr addrspace(3) @lds.1), %bb1 ]
br i1 %my.tmp4, label %bb9, label %bb1
; CHECK-LABEL: bb9:
-; CHECK: store volatile i32 7, i32 addrspace(3)* undef, align 4
+; CHECK: store volatile i32 7, ptr addrspace(3) undef, align 4
; CHECK: ret void
bb9:
- store volatile i32 7, i32 addrspace(3)* undef
+ store volatile i32 7, ptr addrspace(3) undef
ret void
}
; CHECK: br i1 %2, label %3, label %4
;
; CHECK-LABEL: 3:
-; CHECK: store i16 ptrtoint (i32 addrspace(3)* @lds.2 to i16), i16 addrspace(3)* @lds.2.ptr, align 2
-; CHECK: store i16 ptrtoint (i32 addrspace(3)* @lds.1 to i16), i16 addrspace(3)* @lds.1.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds.2 to i16), ptr addrspace(3) @lds.2.ptr, align 2
+; CHECK: store i16 ptrtoint (ptr addrspace(3) @lds.1 to i16), ptr addrspace(3) @lds.1.ptr, align 2
; CHECK: br label %4
;
; CHECK-LABEL: 4: