From d274d64ef45f99387428d80a4f4b81dee91305e8 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 23 Jun 2021 10:21:40 -0700 Subject: [PATCH] [AMDGPU] Check for pointer operand while refining LDS align Also skips the propagation if alignment is 1. Differential Revision: https://reviews.llvm.org/D104796 --- .../lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | 13 +++++++---- .../CodeGen/AMDGPU/lower-kernel-lds-super-align.ll | 25 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index a3a43bd..f5cd3d1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -335,7 +335,7 @@ private: void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL, unsigned MaxDepth = 5) { - if (!MaxDepth) + if (!MaxDepth || A == 1) return; for (User *U : Ptr->users()) { @@ -344,15 +344,20 @@ private: continue; } if (auto *SI = dyn_cast(U)) { - SI->setAlignment(std::max(A, SI->getAlign())); + if (SI->getPointerOperand() == Ptr) + SI->setAlignment(std::max(A, SI->getAlign())); continue; } if (auto *AI = dyn_cast(U)) { - AI->setAlignment(std::max(A, AI->getAlign())); + // None of atomicrmw operations can work on pointers, but let's + // check it anyway in case it will or we will process ConstantExpr. + if (AI->getPointerOperand() == Ptr) + AI->setAlignment(std::max(A, AI->getAlign())); continue; } if (auto *AI = dyn_cast(U)) { - AI->setAlignment(std::max(A, AI->getAlign())); + if (AI->getPointerOperand() == Ptr) + AI->setAlignment(std::max(A, AI->getAlign())); continue; } if (auto *GEP = dyn_cast(U)) { diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll index 470177b..5ce7dcb 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll @@ -6,6 +6,7 @@ ; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [32 x i8] } ; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i16, [2 x i8], i16 } ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] } +; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i32 addrspace(3)*] } ; CHECK-NOT: @lds.1 @lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] undef, align 1 @@ -17,6 +18,9 @@ ; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 16 ; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 8 +; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 16 +; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 4 + ; CHECK-LABEL: @k1 ; CHECK: %1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 0), i32 0, i32 0 ; CHECK: %2 = addrspacecast i8 addrspace(3)* %1 to i8* @@ -127,3 +131,24 @@ define amdgpu_kernel void @k3(i64 %x) { ret void } + +@lds.6 = internal unnamed_addr addrspace(3) global [2 x i32 addrspace(3)*] undef, align 4 + +; Check that aligment is not propagated if use is not a pointer operand. + +; CHECK-LABEL: @k4 +; SUPER-ALIGN_ON: store i32 undef, i32 addrspace(3)* %ptr, align 8 +; SUPER-ALIGN_OFF: store i32 undef, i32 addrspace(3)* %ptr, align 4 +; CHECK: store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4 +; SUPER-ALIGN_ON: %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 8 +; SUPER-ALIGN_OFF: %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4 +; CHECK: %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4 +define amdgpu_kernel void @k4() { + %gep = getelementptr inbounds i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* bitcast ([2 x i32 addrspace(3)*] addrspace(3)* @lds.6 to i32 addrspace(3)* addrspace(3)*), i64 1 + %ptr = bitcast i32 addrspace(3)* addrspace(3)* %gep to i32 addrspace(3)* + store i32 undef, i32 addrspace(3)* %ptr, align 4 + store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4 + %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4 + %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4 + ret void +} -- 2.7.4