From f225471c68881d31835a06c6b2f2b40bdaa287d5 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Sun, 28 May 2023 07:53:04 -0700 Subject: [PATCH] [LSV] Fix the ContextInst for computeKnownBits. Previously we used the later of GEPA or GEPB. This is hacky because really we should be using the later of the two load/store instructions being considered. But also it's flat-out incorrect, because GEPA and GEPB might be in different BBs, in which case we cannot ask which one comes last (assertion failure, https://reviews.llvm.org/D149893#4378332). Fixed, now we use the correct context instruction. Differential Revision: https://reviews.llvm.org/D151630 --- .../Transforms/Vectorize/LoadStoreVectorizer.cpp | 51 +++++++++++++--------- .../LoadStoreVectorizer/AMDGPU/gep-bitcast.ll | 19 ++++++++ 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index f874c63..d4a1815 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -296,10 +296,13 @@ private: /// Tries to compute the offset in bytes PtrB - PtrA. std::optional getConstantOffset(Value *PtrA, Value *PtrB, + Instruction *ContextInst, unsigned Depth = 0); - std::optional gtConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB, - unsigned Depth); + std::optional getConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB, + Instruction *ContextInst, + unsigned Depth); std::optional getConstantOffsetSelects(Value *PtrA, Value *PtrB, + Instruction *ContextInst, unsigned Depth); /// Gets the element type of the vector that the chain will load or store. @@ -1160,15 +1163,15 @@ static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, return false; } -std::optional Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA, - Value *PtrB, - unsigned Depth) { - LLVM_DEBUG(dbgs() << "LSV: gtConstantOffsetComplexAddrs PtrA=" << *PtrA - << " PtrB=" << *PtrB << " Depth=" << Depth << "\n"); +std::optional Vectorizer::getConstantOffsetComplexAddrs( + Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) { + LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA + << " PtrB=" << *PtrB << " ContextInst=" << *ContextInst + << " Depth=" << Depth << "\n"); auto *GEPA = dyn_cast(PtrA); auto *GEPB = dyn_cast(PtrB); if (!GEPA || !GEPB) - return getConstantOffsetSelects(PtrA, PtrB, Depth); + return getConstantOffsetSelects(PtrA, PtrB, ContextInst, Depth); // Look through GEPs after checking they're the same except for the last // index. @@ -1215,7 +1218,7 @@ std::optional Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA, return std::nullopt; APInt IdxDiff = *IdxDiffRange.getSingleElement(); - LLVM_DEBUG(dbgs() << "LSV: gtConstantOffsetComplexAddrs IdxDiff=" << IdxDiff + LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff << "\n"); // Now we need to prove that adding IdxDiff to ValA won't overflow. @@ -1257,7 +1260,6 @@ std::optional Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA, if (!Safe) { // When computing known bits, use the GEPs as context instructions, since // they likely are in the same BB as the load/store. - Instruction *ContextInst = GEPA->comesBefore(GEPB) ? GEPB : GEPA; KnownBits Known(BitWidth); computeKnownBits((IdxDiff.sge(0) ? ValA : OpB), Known, DL, 0, &AC, ContextInst, &DT); @@ -1274,8 +1276,8 @@ std::optional Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA, return std::nullopt; } -std::optional -Vectorizer::getConstantOffsetSelects(Value *PtrA, Value *PtrB, unsigned Depth) { +std::optional Vectorizer::getConstantOffsetSelects( + Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) { if (Depth++ == MaxDepth) return std::nullopt; @@ -1284,13 +1286,15 @@ Vectorizer::getConstantOffsetSelects(Value *PtrA, Value *PtrB, unsigned Depth) { if (SelectA->getCondition() != SelectB->getCondition()) return std::nullopt; LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetSelects, PtrA=" << *PtrA - << ", PtrB=" << *PtrB << ", Depth=" << Depth << "\n"); + << ", PtrB=" << *PtrB << ", ContextInst=" + << *ContextInst << ", Depth=" << Depth << "\n"); std::optional TrueDiff = getConstantOffset( - SelectA->getTrueValue(), SelectB->getTrueValue(), Depth); + SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst, Depth); if (!TrueDiff.has_value()) return std::nullopt; - std::optional FalseDiff = getConstantOffset( - SelectA->getFalseValue(), SelectB->getFalseValue(), Depth); + std::optional FalseDiff = + getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(), + ContextInst, Depth); if (TrueDiff == FalseDiff) return TrueDiff; } @@ -1429,9 +1433,11 @@ std::vector Vectorizer::gatherChains(ArrayRef Instrs) { auto ChainIter = MRU.begin(); for (size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.end(); ++J, ++ChainIter) { - std::optional Offset = - getConstantOffset(getLoadStorePointerOperand(ChainIter->first), - getLoadStorePointerOperand(I)); + std::optional Offset = getConstantOffset( + getLoadStorePointerOperand(ChainIter->first), + getLoadStorePointerOperand(I), + /*ContextInst=*/ + (ChainIter->first->comesBefore(I) ? I : ChainIter->first)); if (Offset.has_value()) { // `Offset` might not have the expected number of bits, if e.g. AS has a // different number of bits than opaque pointers. @@ -1464,9 +1470,11 @@ std::vector Vectorizer::gatherChains(ArrayRef Instrs) { } std::optional Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB, + Instruction *ContextInst, unsigned Depth) { LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA - << ", PtrB=" << *PtrB << ", Depth=" << Depth << "\n"); + << ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst + << ", Depth=" << Depth << "\n"); unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType()); APInt OffsetA(OffsetBitWidth, 0); APInt OffsetB(OffsetBitWidth, 0); @@ -1495,7 +1503,8 @@ std::optional Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB, if (DistRange.isSingleElement()) return OffsetB - OffsetA + *DistRange.getSingleElement(); } - std::optional Diff = gtConstantOffsetComplexAddrs(PtrA, PtrB, Depth); + std::optional Diff = + getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth); if (Diff.has_value()) return OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()); return std::nullopt; diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll index d6eac20..e580408 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll @@ -118,3 +118,22 @@ define void @sexted_i1_gep_index(ptr addrspace(1) %p, i32 %val) { %val1 = load i32, ptr addrspace(1) %gep.1 ret void } + +; CHECK-LABEL: @zexted_i1_gep_index_different_bbs +; CHECK: load i32 +; CHECK: load i32 +define void @zexted_i1_gep_index_different_bbs(ptr addrspace(1) %p, i32 %val) { +entry: + %selector = icmp eq i32 %val, 0 + %flipped = xor i1 %selector, 1 + %index.0 = zext i1 %selector to i64 + %index.1 = zext i1 %flipped to i64 + %gep.0 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 %index.0 + br label %next + +next: + %gep.1 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 %index.1 + %val0 = load i32, ptr addrspace(1) %gep.0 + %val1 = load i32, ptr addrspace(1) %gep.1 + ret void +} -- 2.7.4