From 0a07343e34fc84052e6cc54e55654412b0ed01f4 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 16 Jun 2021 15:38:39 -0700 Subject: [PATCH] [AMDGPU] Fixed constexpr expansion to handle multiple uses Recently added convertConstantExprsToInstructions() does not handle a case when a same ConstantExpr used multiple times in the same instruction. A first use is replaced and the rest of the uses in the instruction are replaced as well with the replaceUsesOfWith(). Then function attempts to replace a constant already destroyed. So far this interface is only used by the AMDGPU BE. Differential Revision: https://reviews.llvm.org/D104425 --- llvm/lib/IR/ReplaceConstant.cpp | 2 +- llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp index 06dedb2..9382f82 100644 --- a/llvm/lib/IR/ReplaceConstant.cpp +++ b/llvm/lib/IR/ReplaceConstant.cpp @@ -84,6 +84,7 @@ void convertConstantExprsToInstructions( Instruction *I, std::map>> &CEPaths, SmallPtrSetImpl *Insts) { + SmallPtrSet Visited; for (Use &U : I->operands()) { // The operand U is either not a constant expression operand or the // constant expression paths do not belong to U, ignore U. @@ -102,7 +103,6 @@ void convertConstantExprsToInstructions( // constant expressions along all paths to corresponding instructions. auto *II = I; auto &Paths = CEPaths[&U]; - SmallPtrSet Visited; for (auto &Path : Paths) { for (auto *CE : Path) { if (!Visited.insert(CE).second) diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll index 99a7c21..402a75f 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll @@ -9,6 +9,7 @@ ; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i32 } ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i8] } ; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i8] } +; CHECK: %llvm.amdgcn.kernel.k5.lds.t = type { [505 x i32] } ; Use constant from different kernels ;. @@ -17,6 +18,7 @@ ; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 4 ; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 16 ; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 2 +; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t undef, align 16 ;. define amdgpu_kernel void @k0(i64 %x) { ; CHECK-LABEL: @k0( @@ -97,3 +99,16 @@ define amdgpu_kernel void @k4(i64 %x) { store i8 1, i8 addrspace(0)* %ptr, align 1 ret void } + +@lds.4 = internal unnamed_addr addrspace(3) global [505 x i32] undef, align 4 + +; Multiple constexpr use in a same instruction. +define amdgpu_kernel void @k5() { +; CHECK-LABEL: @k5( +; CHECK-NEXT: %1 = addrspacecast [505 x i32] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k5.lds.t, %llvm.amdgcn.kernel.k5.lds.t addrspace(3)* @llvm.amdgcn.kernel.k5.lds, i32 0, i32 0) to [505 x i32]* +; CHECK-NEXT: %2 = getelementptr inbounds [505 x i32], [505 x i32]* %1, i64 0, i64 0 +; CHECK-NEXT: call void undef(i32* %2, i32* %2) +; + call void undef(i32* getelementptr inbounds ([505 x i32], [505 x i32]* addrspacecast ([505 x i32] addrspace(3)* @lds.4 to [505 x i32]*), i64 0, i64 0), i32* getelementptr inbounds ([505 x i32], [505 x i32]* addrspacecast ([505 x i32] addrspace(3)* @lds.4 to [505 x i32]*), i64 0, i64 0)) + ret void +} -- 2.7.4