From c46634554d507b6b764eeba881ed4eef3b28baac Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 21 Apr 2022 16:45:19 -0700 Subject: [PATCH] [LoadStoreVectorizer] Consider if operation is faster than before Compare a relative speed of misaligned accesses before and after vectorization, not just check the new instruction is not going to be slower. Since no target now returns anything but 0 or 1 for Fast argument of the allowsMisalignedMemoryAccesses this is still NFCI. The subsequent patch will tune actual vaues of Fast on AMDGPU. Differential Revision: https://reviews.llvm.org/D124218 --- .../Transforms/Vectorize/LoadStoreVectorizer.cpp | 31 ++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index a038051..0b7fc85 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -186,8 +186,11 @@ private: SmallPtrSet *InstructionsProcessed); /// Check if this load/store access is misaligned accesses. + /// Returns a \p RelativeSpeed of an operation if allowed suitable to + /// compare to another result for the same \p AddressSpace and potentially + /// different \p Alignment and \p SzInBytes. bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace, - Align Alignment); + Align Alignment, unsigned &RelativeSpeed); }; class LoadStoreVectorizerLegacyPass : public FunctionPass { @@ -1078,8 +1081,14 @@ bool Vectorizer::vectorizeStoreChain( InstructionsProcessed->insert(Chain.begin(), Chain.end()); // If the store is going to be misaligned, don't vectorize it. - if (accessIsMisaligned(SzInBytes, AS, Alignment)) { + unsigned RelativeSpeed; + if (accessIsMisaligned(SzInBytes, AS, Alignment, RelativeSpeed)) { if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) { + unsigned SpeedBefore; + accessIsMisaligned(EltSzInBytes, AS, Alignment, SpeedBefore); + if (SpeedBefore > RelativeSpeed) + return false; + auto Chains = splitOddVectorElts(Chain, Sz); bool Vectorized = false; Vectorized |= vectorizeStoreChain(Chains.first, InstructionsProcessed); @@ -1231,8 +1240,14 @@ bool Vectorizer::vectorizeLoadChain( InstructionsProcessed->insert(Chain.begin(), Chain.end()); // If the load is going to be misaligned, don't vectorize it. - if (accessIsMisaligned(SzInBytes, AS, Alignment)) { + unsigned RelativeSpeed; + if (accessIsMisaligned(SzInBytes, AS, Alignment, RelativeSpeed)) { if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) { + unsigned SpeedBefore; + accessIsMisaligned(EltSzInBytes, AS, Alignment, SpeedBefore); + if (SpeedBefore > RelativeSpeed) + return false; + auto Chains = splitOddVectorElts(Chain, Sz); bool Vectorized = false; Vectorized |= vectorizeLoadChain(Chains.first, InstructionsProcessed); @@ -1316,15 +1331,15 @@ bool Vectorizer::vectorizeLoadChain( } bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace, - Align Alignment) { + Align Alignment, unsigned &RelativeSpeed) { + RelativeSpeed = 0; if (Alignment.value() % SzInBytes == 0) return false; - unsigned Fast = 0; bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(), SzInBytes * 8, AddressSpace, - Alignment, &Fast); + Alignment, &RelativeSpeed); LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows - << " and fast? " << Fast << "\n";); - return !Allows || !Fast; + << " with relative speed = " << RelativeSpeed << '\n';); + return !Allows || !RelativeSpeed; } -- 2.7.4