From: Jon Chesterfield Date: Sat, 26 Jun 2021 00:36:41 +0000 (+0100) Subject: Disable ReplaceLDS pass, patch up tests to match X-Git-Tag: llvmorg-14-init~2953 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=50ad3478bdd3f0643ee94f6663293af5b3c27afe;p=platform%2Fupstream%2Fllvm.git Disable ReplaceLDS pass, patch up tests to match Most tests passed with an extra argument to explicitly enable the pass. One does not, deleted it as part of this change. I can't see why the codegen would be different between default on and default off but switched on. It can be retrieved from the project history. This would be a revert, but git revert was not clean. Disabling the pass and leaving it in tree is less likely to cause breakage elsewhere than patching up the git revert conflicts on unfamiliar code. It'll be landed without review, as @hsmhsm is believed unavailable at present. Differential Revision: https://reviews.llvm.org/D104962 --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 2c1e509..82d0f83 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -200,7 +200,7 @@ static cl::opt EnableStructurizerWorkarounds( static cl::opt EnableLDSReplaceWithPointer( "amdgpu-enable-lds-replace-with-pointer", - cl::desc("Enable LDS replace with pointer pass"), cl::init(true), + cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden); static cl::opt EnableLowerModuleLDS( diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index a687d15..49ffd8d 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -42,7 +42,6 @@ ; GCN-O0-NEXT: Inliner for always_inline functions ; GCN-O0-NEXT: A No-Op Barrier Pass ; GCN-O0-NEXT: Lower OpenCL enqueued blocks -; GCN-O0-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Dominator Tree Construction @@ -193,7 +192,6 @@ ; GCN-O1-NEXT: Inliner for always_inline functions ; GCN-O1-NEXT: A No-Op Barrier Pass ; GCN-O1-NEXT: Lower OpenCL enqueued blocks -; GCN-O1-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Infer address spaces @@ -442,7 +440,6 @@ ; GCN-O1-OPTS-NEXT: Inliner for always_inline functions ; GCN-O1-OPTS-NEXT: A No-Op Barrier Pass ; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks -; GCN-O1-OPTS-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Infer address spaces @@ -723,7 +720,6 @@ ; GCN-O2-NEXT: Inliner for always_inline functions ; GCN-O2-NEXT: A No-Op Barrier Pass ; GCN-O2-NEXT: Lower OpenCL enqueued blocks -; GCN-O2-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Infer address spaces @@ -1005,7 +1001,6 @@ ; GCN-O3-NEXT: Inliner for always_inline functions ; GCN-O3-NEXT: A No-Op Barrier Pass ; GCN-O3-NEXT: Lower OpenCL enqueued blocks -; GCN-O3-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Infer address spaces diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-diamond-shape.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-diamond-shape.ll index 5498809..28650ba 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-diamond-shape.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-diamond-shape.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-selected_functions.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-selected_functions.ll index abc17de..6a35d86 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-selected_functions.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-selected_functions.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-global-scope-use.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-global-scope-use.ll index 34a97da8..7c1de3b 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-global-scope-use.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-global-scope-use.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-inline-asm-call.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-inline-asm-call.ll index 580b375..1d04b9d 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-inline-asm-call.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-inline-asm-call.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-kernel-only-used-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-kernel-only-used-lds.ll index 34a624b..f890164 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-kernel-only-used-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-kernel-only-used-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION ; ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-not-reachable-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-not-reachable-lds.ll index 385d78c..df82e38 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-not-reachable-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-not-reachable-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION ; ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-small-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-small-lds.ll index 7f73177..fe454c7 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-small-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-small-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION ; ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-diamond-shape.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-diamond-shape.ll index df3cfe7..5d36a1c 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-diamond-shape.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-diamond-shape.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-selected_functions.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-selected_functions.ll index 41ab55c..61bb91a 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-selected_functions.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-selected_functions.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-signature-match.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-signature-match.ll index 671f989..55fd210 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-signature-match.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-signature-match.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-lds-offsets.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-lds-offsets.ll deleted file mode 100644 index e571fd8..0000000 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-lds-offsets.ll +++ /dev/null @@ -1,212 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck --check-prefix=POINTER-REPLACE %s -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-lower-module-lds < %s | FileCheck --check-prefix=LOWER_LDS %s -; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck --check-prefix=GCN %s - -; -; DESCRIPTION: -; -; 1. There are three lds defined - @lds.1, @lds.2 and @lds.3, which are of types i32, i64, and [2 x i64]. -; @lds.3 is aliased to to @alias.to.lds.3 -; 2. @lds.1 is used in function @f1, and @lds.2 is used in function @f2, @alias.to.lds.3 is used in kernel @k1. - -; 3. Pointer-replacement pass replaces @lds.1 and @lds.2 by pointers @lds.1.ptr and @lds.2.ptr respectively. -; However it does not touch @lds.3 since it is used in global scope (aliased). -; -; 4. LDS-lowering pass sees use of @lds.1.ptr in function @f1, use of @lds.2.ptr in function @f2, and use of -; @lds.3 (via alias @alias.to.lds.3) in kernel @k1. Hence it module lowers these lds into struct instance -; @llvm.amdgcn.module.lds. -; -; The struct member order is - [lds.3, lds.1.ptr, lds.2.ptr]. Since @llvm.amdgcn.module.lds itself is allocated -; on address 0, lds.3 is allocated on address 0, lds.1.ptr is allocated on address 16, and lds.2.ptr is allocated -; on address 18. -; -; Again LDS-lowering pass sees use of @lds.1 and @lds.2 in kernel. Hence it kernel lowers these lds into struct -; instance @llvm.amdgcn.kernel.k1.lds. -; -; The struct member order is - [@lds.2, @lds.1]. By now, already (16 + 2 + 2) 20 byte of memory allocated, @lds.2 -; is allocated on address 24 since it needs to be allocated on 8 byte boundary, and @lds.1 is allocated on address -; 32. -; -; 5. Hence the final GCN ISA looks as below: -; -; Within kernel @k1: -; address 24 is stored in address 18. -; address 32 is stored in address 16 -; -; Within function @f1: -; address 32 is loaded from address 16 -; -; Within function @f2: -; address 24 is loaded from address 18 -; - - -; POINTER-REPLACE: @lds.1 = addrspace(3) global i32 undef, align 4 -; POINTER-REPLACE: @lds.2 = addrspace(3) global i64 undef, align 8 -; POINTER-REPLACE: @lds.3 = addrspace(3) global [2 x i64] undef, align 16 -; POINTER-REPLACE: @lds.1.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 -; POINTER-REPLACE: @lds.2.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 -; POINTER-REPLACE: @alias.to.lds.3 = alias [2 x i64], [2 x i64] addrspace(3)* @lds.3 - - -; LOWER_LDS-NOT: @lds.1 -; LOWER_LDS-NOT: @lds.2 -; LOWER_LDS-NOT: @lds.3 -; LOWER_LDS: %llvm.amdgcn.module.lds.t = type { [2 x i64], i16, i16 } -; LOWER_LDS: %llvm.amdgcn.kernel.k1.lds.t = type { i64, i32 } -; LOWER_LDS: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16 -; LOWER_LDS: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -; LOWER_LDS: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 8 -; LOWER_LDS: @alias.to.lds.3 = alias [2 x i64], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) - -@lds.1 = addrspace(3) global i32 undef, align 4 -@lds.2 = addrspace(3) global i64 undef, align 8 -@lds.3 = addrspace(3) global [2 x i64] undef, align 16 -@alias.to.lds.3 = alias [2 x i64], [2 x i64] addrspace(3)* @lds.3 - -; POINTER-REPLACE-LABEL: @f1 -; POINTER-REPLACE: %1 = load i16, i16 addrspace(3)* @lds.1.ptr, align 2 -; POINTER-REPLACE: %2 = getelementptr i8, i8 addrspace(3)* null, i16 %1 -; POINTER-REPLACE: %3 = bitcast i8 addrspace(3)* %2 to i32 addrspace(3)* -; POINTER-REPLACE: store i32 7, i32 addrspace(3)* %3, align 4 -; POINTER-REPLACE: ret void - - -; LOWER_LDS-LABEL: @f1 -; LOWER_LDS: %1 = load i16, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1), align 16 -; LOWER_LDS: %2 = getelementptr i8, i8 addrspace(3)* null, i16 %1 -; LOWER_LDS: %3 = bitcast i8 addrspace(3)* %2 to i32 addrspace(3)* -; LOWER_LDS: store i32 7, i32 addrspace(3)* %3, align 4 -; LOWER_LDS: ret void - - -; GCN-LABEL: f1: -; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN: v_mov_b32_e32 v0, 0 -; GCN: ds_read_i16 v0, v0 offset:16 -; GCN: v_mov_b32_e32 v1, 7 -; GCN: s_waitcnt lgkmcnt(0) -; GCN: ds_write_b32 v0, v1 -; GCN: s_waitcnt lgkmcnt(0) -; GCN: s_setpc_b64 s[30:31] -define void @f1() { - store i32 7, i32 addrspace(3)* @lds.1 - ret void -} - -; POINTER-REPLACE-LABEL: @f2 -; POINTER-REPLACE: %1 = load i16, i16 addrspace(3)* @lds.2.ptr, align 2 -; POINTER-REPLACE: %2 = getelementptr i8, i8 addrspace(3)* null, i16 %1 -; POINTER-REPLACE: %3 = bitcast i8 addrspace(3)* %2 to i64 addrspace(3)* -; POINTER-REPLACE: store i64 15, i64 addrspace(3)* %3, align 4 -; POINTER-REPLACE: ret void - - -; LOWER_LDS-LABEL: @f2 -; LOWER_LDS: %1 = load i16, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 2 -; LOWER_LDS: %2 = getelementptr i8, i8 addrspace(3)* null, i16 %1 -; LOWER_LDS: %3 = bitcast i8 addrspace(3)* %2 to i64 addrspace(3)* -; LOWER_LDS: store i64 15, i64 addrspace(3)* %3, align 4 -; LOWER_LDS: ret void - - -; GCN-LABEL: f2: -; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN: v_mov_b32_e32 v1, 0 -; GCN: ds_read_i16 v2, v1 offset:18 -; GCN: v_mov_b32_e32 v0, 15 -; GCN: s_waitcnt lgkmcnt(0) -; GCN: ds_write_b64 v2, v[0:1] -; GCN: s_waitcnt lgkmcnt(0) -; GCN: s_setpc_b64 s[30:31] -define void @f2() { - store i64 15, i64 addrspace(3)* @lds.2 - ret void -} - -; POINTER-REPLACE-LABEL: @k1 -; POINTER-REPLACE: %1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) -; POINTER-REPLACE: %2 = icmp eq i32 %1, 0 -; POINTER-REPLACE: br i1 %2, label %3, label %4 -; -; POINTER-REPLACE-LABEL: 3: -; POINTER-REPLACE: store i16 ptrtoint (i64 addrspace(3)* @lds.2 to i16), i16 addrspace(3)* @lds.2.ptr, align 2 -; POINTER-REPLACE: store i16 ptrtoint (i32 addrspace(3)* @lds.1 to i16), i16 addrspace(3)* @lds.1.ptr, align 2 -; POINTER-REPLACE: br label %4 -; -; POINTER-REPLACE-LABEL: 4: -; POINTER-REPLACE: call void @llvm.amdgcn.wave.barrier() -; POINTER-REPLACE: %bc = bitcast [2 x i64] addrspace(3)* @alias.to.lds.3 to i8 addrspace(3)* -; POINTER-REPLACE: store i8 3, i8 addrspace(3)* %bc, align 2 -; POINTER-REPLACE: call void @f1() -; POINTER-REPLACE: call void @f2() -; POINTER-REPLACE: ret void - - -; LOWER_LDS-LABEL: @k1 -; LOWER_LDS: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; LOWER_LDS: %1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) -; LOWER_LDS: %2 = icmp eq i32 %1, 0 -; LOWER_LDS: br i1 %2, label %3, label %6 -; -; LOWER_LDS-LABEL: 3: -; LOWER_LDS: %4 = ptrtoint i64 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 0) to i16 -; LOWER_LDS: store i16 %4, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 2 -; LOWER_LDS: %5 = ptrtoint i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1) to i16 -; LOWER_LDS: store i16 %5, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1), align 16 -; LOWER_LDS: br label %6 -; -; LOWER_LDS-LABEL: 6: -; LOWER_LDS: call void @llvm.amdgcn.wave.barrier() -; LOWER_LDS: %bc = bitcast [2 x i64] addrspace(3)* @alias.to.lds.3 to i8 addrspace(3)* -; LOWER_LDS: store i8 3, i8 addrspace(3)* %bc, align 2 -; LOWER_LDS: call void @f1() -; LOWER_LDS: call void @f2() -; LOWER_LDS: ret void - - -; GCN-LABEL: k1: -; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GCN: s_mov_b32 s10, -1 -; GCN: s_mov_b32 s11, 0xe00000 -; GCN: s_add_u32 s8, s8, s1 -; GCN: v_mbcnt_lo_u32_b32 v0, -1, 0 -; GCN: s_addc_u32 s9, s9, 0 -; GCN: v_cmp_eq_u32_e32 vcc, 0, v0 -; GCN: s_mov_b32 s32, 0 -; GCN: s_and_saveexec_b64 s[0:1], vcc -; GCN: s_cbranch_execz BB2_2 -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, 0x180020 -; GCN: ds_write_b32 v0, v1 offset:16 -; GCN-LABEL: BB2_2: -; GCN: s_or_b64 exec, exec, s[0:1] -; GCN: s_getpc_b64 s[0:1] -; GCN: s_add_u32 s0, s0, f1@gotpcrel32@lo+4 -; GCN: s_addc_u32 s1, s1, f1@gotpcrel32@hi+12 -; GCN: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GCN: s_mov_b64 s[0:1], s[8:9] -; GCN: s_mov_b64 s[2:3], s[10:11] -; GCN: v_mov_b32_e32 v0, alias.to.lds.3@abs32@lo -; GCN: v_mov_b32_e32 v1, 3 -; ; wave barrier -; GCN: ds_write_b8 v0, v1 -; GCN: s_waitcnt lgkmcnt(0) -; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: s_getpc_b64 s[0:1] -; GCN: s_add_u32 s0, s0, f2@gotpcrel32@lo+4 -; GCN: s_addc_u32 s1, s1, f2@gotpcrel32@hi+12 -; GCN: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GCN: s_mov_b64 s[0:1], s[8:9] -; GCN: s_mov_b64 s[2:3], s[10:11] -; GCN: s_waitcnt lgkmcnt(0) -; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: s_endpgm -define amdgpu_kernel void @k1() { - %bc = bitcast [2 x i64] addrspace(3)* @alias.to.lds.3 to i8 addrspace(3)* - store i8 3, i8 addrspace(3)* %bc, align 2 - call void @f1() - call void @f2() - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-multiple-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-multiple-lds.ll index 314f909..b2c6928 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-multiple-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-multiple-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-same-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-same-lds.ll index 453b5fd..c0a614f 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-same-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-same-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll index 763af6a..858f9be 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr2.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr2.ll index 24bcee3..43d120d 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr2.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr2.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ; There is one lds global defined here, and this lds is used within a single non-kernel diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-phi-inst.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-phi-inst.ll index 616439a..f228599 100644 --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-phi-inst.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-phi-inst.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s ; DESCRIPTION: ;