From 3b52c04e82fada6483337dfd6a033f336fbc73ec Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Mon, 22 Mar 2021 20:28:41 +0000 Subject: [PATCH] Make FindAvailableLoadedValue TBAA aware FindAvailableLoadedValue() relies on FindAvailablePtrLoadStore() to run the alias analysis when searching for an equivalent value. However, FindAvailablePtrLoadStore() calls the alias analysis framework with a memory location for the load constructed from an address and a size, which thus lacks TBAA metadata info. This commit modifies FindAvailablePtrLoadStore() to accept an optional memory location as parameter to allow FindAvailableLoadedValue() to create it based on the load instruction, which would then have TBAA metadata info attached. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D99206 --- llvm/include/llvm/Analysis/Loads.h | 7 ++-- llvm/lib/Analysis/Loads.cpp | 48 +++++++++++----------- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 17 +++++--- .../Transforms/InstCombine/load-no-aliasing.ll | 5 +-- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index f41e5e0..ced1943 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -25,6 +25,7 @@ class Instruction; class LoadInst; class Loop; class MDNode; +class MemoryLocation; class ScalarEvolution; class TargetLibraryInfo; @@ -147,7 +148,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, /// this function, if ScanFrom points at the beginning of the block, it's safe /// to continue scanning the predecessors. /// -/// \param Ptr The pointer we want the load and store to originate from. +/// \param Loc The location we want the load and store to originate from. /// \param AccessTy The access type of the pointer. /// \param AtLeastAtomic Are we looking for at-least an atomic load/store ? In /// case it is false, we can return an atomic or non-atomic load or store. In @@ -163,8 +164,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, /// location in memory, as opposed to the value operand of a store. /// /// \returns The found value, or nullptr if no value is found. -Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, - BasicBlock *ScanBB, +Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, + bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst); diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 7279ed5..8d9330c 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -437,21 +438,24 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, if (!Load->isUnordered()) return nullptr; - return FindAvailablePtrLoadStore( - Load->getPointerOperand(), Load->getType(), Load->isAtomic(), ScanBB, - ScanFrom, MaxInstsToScan, AA, IsLoad, NumScanedInst); + MemoryLocation Loc = MemoryLocation::get(Load); + return findAvailablePtrLoadStore(Loc, Load->getType(), Load->isAtomic(), + ScanBB, ScanFrom, MaxInstsToScan, AA, IsLoad, + NumScanedInst); } // Check if the load and the store have the same base, constant offsets and // non-overlapping access ranges. -static bool AreNonOverlapSameBaseLoadAndStore( - Value *LoadPtr, Type *LoadTy, Value *StorePtr, Type *StoreTy, - const DataLayout &DL) { +static bool areNonOverlapSameBaseLoadAndStore(const Value *LoadPtr, + Type *LoadTy, + const Value *StorePtr, + Type *StoreTy, + const DataLayout &DL) { APInt LoadOffset(DL.getTypeSizeInBits(LoadPtr->getType()), 0); APInt StoreOffset(DL.getTypeSizeInBits(StorePtr->getType()), 0); - Value *LoadBase = LoadPtr->stripAndAccumulateConstantOffsets( + const Value *LoadBase = LoadPtr->stripAndAccumulateConstantOffsets( DL, LoadOffset, /* AllowNonInbounds */ false); - Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets( + const Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets( DL, StoreOffset, /* AllowNonInbounds */ false); if (LoadBase != StoreBase) return false; @@ -464,7 +468,7 @@ static bool AreNonOverlapSameBaseLoadAndStore( return LoadRange.intersectWith(StoreRange).isEmptySet(); } -static Value *getAvailableLoadStore(Instruction *Inst, Value *Ptr, +static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr, Type *AccessTy, bool AtLeastAtomic, const DataLayout &DL, bool *IsLoadCSE) { // If this is a load of Ptr, the loaded value is available. @@ -511,17 +515,15 @@ static Value *getAvailableLoadStore(Instruction *Inst, Value *Ptr, return nullptr; } -Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, - bool AtLeastAtomic, BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, - AAResults *AA, bool *IsLoadCSE, - unsigned *NumScanedInst) { +Value *llvm::findAvailablePtrLoadStore( + const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, + BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, + AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; const DataLayout &DL = ScanBB->getModule()->getDataLayout(); - Value *StrippedPtr = Ptr->stripPointerCasts(); + const Value *StrippedPtr = Loc.Ptr->stripPointerCasts(); while (ScanFrom != ScanBB->begin()) { // We must ignore debug info directives when counting (otherwise they @@ -547,8 +549,6 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, return Available; // Try to get the store size for the type. - auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy)); - if (StoreInst *SI = dyn_cast(Inst)) { Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); @@ -565,14 +565,14 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, // base, constant offsets and non-overlapping access ranges, ignore the // store. This is a simple form of alias analysis that is used by the // inliner. FIXME: use BasicAA if possible. - if (AreNonOverlapSameBaseLoadAndStore( - Ptr, AccessTy, SI->getPointerOperand(), + if (areNonOverlapSameBaseLoadAndStore( + Loc.Ptr, AccessTy, SI->getPointerOperand(), SI->getValueOperand()->getType(), DL)) continue; } else { // If we have alias analysis and it says the store won't modify the // loaded value, ignore the store. - if (!isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize))) + if (!isModSet(AA->getModRefInfo(SI, Loc))) continue; } @@ -585,7 +585,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, if (Inst->mayWriteToMemory()) { // If alias analysis claims that it really won't modify the load, // ignore it. - if (AA && !isModSet(AA->getModRefInfo(Inst, StrippedPtr, AccessSize))) + if (AA && !isModSet(AA->getModRefInfo(Inst, Loc))) continue; // May modify the pointer, bail out. @@ -635,9 +635,9 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, // If we found an available value, ensure that the instructions in between // did not modify the memory location. if (Available) { - auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy)); + MemoryLocation Loc = MemoryLocation::get(Load); for (Instruction *Inst : MustNotAliasInsts) - if (isModSet(AA.getModRefInfo(Inst, StrippedPtr, AccessSize))) + if (isModSet(AA.getModRefInfo(Inst, Loc))) return nullptr; } diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 2fb4a67..e485295 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -1388,10 +1389,14 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { "Attempting to CSE volatile or atomic loads"); // If this is a load on a phi pointer, phi-translate it and search // for available load/store to the pointer in predecessors. - Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB); - PredAvailable = FindAvailablePtrLoadStore( - Ptr, LoadI->getType(), LoadI->isAtomic(), PredBB, BBIt, - DefMaxInstsToScan, AA, &IsLoadCSE, &NumScanedInst); + Type *AccessTy = LoadI->getType(); + const auto &DL = LoadI->getModule()->getDataLayout(); + MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB), + LocationSize::precise(DL.getTypeStoreSize(AccessTy)), + AATags); + PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(), + PredBB, BBIt, DefMaxInstsToScan, + AA, &IsLoadCSE, &NumScanedInst); // If PredBB has a single predecessor, continue scanning through the // single predecessor. @@ -1401,8 +1406,8 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { SinglePredBB = SinglePredBB->getSinglePredecessor(); if (SinglePredBB) { BBIt = SinglePredBB->end(); - PredAvailable = FindAvailablePtrLoadStore( - Ptr, LoadI->getType(), LoadI->isAtomic(), SinglePredBB, BBIt, + PredAvailable = findAvailablePtrLoadStore( + Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt, (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE, &NumScanedInst); } diff --git a/llvm/test/Transforms/InstCombine/load-no-aliasing.ll b/llvm/test/Transforms/InstCombine/load-no-aliasing.ll index 4147320..c4e0bc0 100644 --- a/llvm/test/Transforms/InstCombine/load-no-aliasing.ll +++ b/llvm/test/Transforms/InstCombine/load-no-aliasing.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tbaa -evaluate-aa-metadata -instcombine -S < %s | FileCheck %s +; RUN: opt -tbaa -instcombine -S < %s | FileCheck %s ; Check that load to load forwarding works with non aliasing store inbetween. define i32 @test_load_store_load_combine(i32*, float*) { @@ -7,8 +7,7 @@ define i32 @test_load_store_load_combine(i32*, float*) { ; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[TMP0:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[A]] to float ; CHECK-NEXT: store float [[F]], float* [[TMP1:%.*]], align 4, !tbaa [[TBAA4:![0-9]+]] -; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: ret i32 [[A]] ; %a = load i32, i32* %0, align 4, !tbaa !0 %f = sitofp i32 %a to float -- 2.7.4