return *this;
}
+ /// Comparison for sorting ranges by offset.
+ ///
+ /// Returns true if the offset \p L is less than that of \p R.
+ inline static bool OffsetLessThan(const RangeTy &L, const RangeTy &R) {
+ return L.Offset < R.Offset;
+ }
+
/// Constants used to represent special offsets or sizes.
/// - This assumes that Offset and Size are non-negative.
/// - The constants should not clash with DenseMapInfo, such as EmptyKey
static constexpr int64_t Unknown = -2;
};
+inline raw_ostream &operator<<(raw_ostream &OS, const RangeTy &R) {
+ OS << "[" << R.Offset << ", " << R.Size << "]";
+ return OS;
+}
+
inline bool operator==(const RangeTy &A, const RangeTy &B) {
return A.Offset == B.Offset && A.Size == B.Size;
}
AK_MUST_READ_WRITE = AK_MUST | AK_R | AK_W,
};
+ /// A container for a list of ranges.
+ struct RangeList {
+ // The set of ranges rarely contains more than one element, and is unlikely
+ // to contain more than say four elements. So we find the middle-ground with
+ // a sorted vector. This avoids hard-coding a rarely used number like "four"
+ // into every instance of a SmallSet.
+ using RangeTy = AA::RangeTy;
+ using VecTy = SmallVector<RangeTy>;
+ using iterator = VecTy::iterator;
+ using const_iterator = VecTy::const_iterator;
+ VecTy Ranges;
+
+ RangeList(const RangeTy &R) { Ranges.push_back(R); }
+ RangeList(ArrayRef<int64_t> Offsets, int64_t Size) {
+ Ranges.reserve(Offsets.size());
+ for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
+ assert(((i + 1 == e) || Offsets[i] < Offsets[i + 1]) &&
+ "Expected strictly ascending offsets.");
+ Ranges.emplace_back(Offsets[i], Size);
+ }
+ }
+ RangeList() = default;
+
+ iterator begin() { return Ranges.begin(); }
+ iterator end() { return Ranges.end(); }
+ const_iterator begin() const { return Ranges.begin(); }
+ const_iterator end() const { return Ranges.end(); }
+
+ // Helpers required for std::set_difference
+ using value_type = RangeTy;
+ void push_back(const RangeTy &R) {
+ assert((Ranges.empty() || RangeTy::OffsetLessThan(Ranges.back(), R)) &&
+ "Ensure the last element is the greatest.");
+ Ranges.push_back(R);
+ }
+
+ /// Copy ranges from \p L that are not in \p R, into \p D.
+ static void set_difference(const RangeList &L, const RangeList &R,
+ RangeList &D) {
+ std::set_difference(L.begin(), L.end(), R.begin(), R.end(),
+ std::back_inserter(D), RangeTy::OffsetLessThan);
+ }
+
+ unsigned size() const { return Ranges.size(); }
+
+ bool operator==(const RangeList &OI) const { return Ranges == OI.Ranges; }
+
+ /// Merge the ranges in \p RHS into the current ranges.
+ /// - Merging a list of unknown ranges makes the current list unknown.
+ /// - Ranges with the same offset are merged according to RangeTy::operator&
+ /// \return true if the current RangeList changed.
+ bool merge(const RangeList &RHS) {
+ if (isUnknown())
+ return false;
+ if (RHS.isUnknown()) {
+ setUnknown();
+ return true;
+ }
+
+ if (Ranges.empty()) {
+ Ranges = RHS.Ranges;
+ return true;
+ }
+
+ bool Changed = false;
+ auto LPos = Ranges.begin();
+ for (auto &R : RHS.Ranges) {
+ auto Result = insert(LPos, R);
+ if (isUnknown())
+ return true;
+ LPos = Result.first;
+ Changed |= Result.second;
+ }
+ return Changed;
+ }
+
+ /// Insert \p R at the given iterator \p Pos, and merge if necessary.
+ ///
+ /// This assumes that all ranges before \p Pos are OffsetLessThan \p R, and
+ /// then maintains the sorted order for the suffix list.
+ ///
+ /// \return The place of insertion and true iff anything changed.
+ std::pair<iterator, bool> insert(iterator Pos, const RangeTy &R) {
+ if (isUnknown())
+ return std::make_pair(Ranges.begin(), false);
+ if (R.offsetOrSizeAreUnknown()) {
+ return std::make_pair(setUnknown(), true);
+ }
+
+ // Maintain this as a sorted vector of unique entries.
+ auto LB = std::lower_bound(Pos, Ranges.end(), R, RangeTy::OffsetLessThan);
+ if (LB == Ranges.end() || LB->Offset != R.Offset)
+ return std::make_pair(Ranges.insert(LB, R), true);
+ bool Changed = *LB != R;
+ *LB &= R;
+ if (LB->offsetOrSizeAreUnknown())
+ return std::make_pair(setUnknown(), true);
+ return std::make_pair(LB, Changed);
+ }
+
+ /// Insert the given range \p R, maintaining sorted order.
+ ///
+ /// \return The place of insertion and true iff anything changed.
+ std::pair<iterator, bool> insert(const RangeTy &R) {
+ return insert(Ranges.begin(), R);
+ }
+
+ /// Add the increment \p Inc to the offset of every range.
+ void addToAllOffsets(int64_t Inc) {
+ assert(!isUnassigned() &&
+ "Cannot increment if the offset is not yet computed!");
+ if (isUnknown())
+ return;
+ for (auto &R : Ranges) {
+ R.Offset += Inc;
+ }
+ }
+
+ /// Return true iff there is exactly one range and it is known.
+ bool isUnique() const {
+ return Ranges.size() == 1 && !Ranges.front().offsetOrSizeAreUnknown();
+ }
+
+ /// Return the unique range, assuming it exists.
+ const RangeTy &getUnique() const {
+ assert(isUnique() && "No unique range to return!");
+ return Ranges.front();
+ }
+
+ /// Return true iff the list contains an unknown range.
+ bool isUnknown() const {
+ if (isUnassigned())
+ return false;
+ if (Ranges.front().offsetOrSizeAreUnknown()) {
+ assert(Ranges.size() == 1 && "Unknown is a singleton range.");
+ return true;
+ }
+ return false;
+ }
+
+ /// Discard all ranges and insert a single unknown range.
+ iterator setUnknown() {
+ Ranges.clear();
+ Ranges.push_back(RangeTy::getUnknown());
+ return Ranges.begin();
+ }
+
+ /// Return true if no ranges have been inserted.
+ bool isUnassigned() const { return Ranges.size() == 0; }
+ };
+
/// An access description.
struct Access {
Access(Instruction *I, int64_t Offset, int64_t Size,
std::optional<Value *> Content, AccessKind Kind, Type *Ty)
- : LocalI(I), RemoteI(I), Content(Content), Range(Offset, Size),
+ : LocalI(I), RemoteI(I), Content(Content), Ranges(Offset, Size),
Kind(Kind), Ty(Ty) {
verify();
}
+ Access(Instruction *LocalI, Instruction *RemoteI, const RangeList &Ranges,
+ std::optional<Value *> Content, AccessKind K, Type *Ty)
+ : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Ranges(Ranges),
+ Kind(K), Ty(Ty) {
+ if (Ranges.size() > 1) {
+ Kind = AccessKind(Kind | AK_MAY);
+ Kind = AccessKind(Kind & ~AK_MUST);
+ }
+ verify();
+ }
Access(Instruction *LocalI, Instruction *RemoteI, int64_t Offset,
int64_t Size, std::optional<Value *> Content, AccessKind Kind,
Type *Ty)
: LocalI(LocalI), RemoteI(RemoteI), Content(Content),
- Range(Offset, Size), Kind(Kind), Ty(Ty) {
+ Ranges(Offset, Size), Kind(Kind), Ty(Ty) {
verify();
}
Access(const Access &Other) = default;
- Access(const Access &&Other)
- : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
- Range(Other.Range), Kind(Other.Kind), Ty(Other.Ty) {}
Access &operator=(const Access &Other) = default;
bool operator==(const Access &R) const {
- return LocalI == R.LocalI && RemoteI == R.RemoteI && Range == R.Range &&
+ return LocalI == R.LocalI && RemoteI == R.RemoteI && Ranges == R.Ranges &&
Content == R.Content && Kind == R.Kind;
}
bool operator!=(const Access &R) const { return !(*this == R); }
Access &operator&=(const Access &R) {
assert(RemoteI == R.RemoteI && "Expected same instruction!");
assert(LocalI == R.LocalI && "Expected same instruction!");
+
+ // Note that every Access object corresponds to a unique Value, and only
+ // accesses to the same Value are merged. Hence we assume that all ranges
+ // are the same size. If ranges can be different size, then the contents
+ // must be dropped.
+ Ranges.merge(R.Ranges);
+ Content =
+ AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty);
+
+ // Combine the access kind, which results in a bitwise union.
+ // - If MAY is present in the union, then any MUST needs to be removed.
+ // - If there is more than one range, then this must be a MAY.
Kind = AccessKind(Kind | R.Kind);
- auto Before = Range;
- Range &= R.Range;
- if (Before.isUnassigned() || Before == Range) {
- Content =
- AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty);
- } else {
- // Since the Range information changed, set a conservative state -- drop
- // the contents, and assume MayAccess rather than MustAccess.
- setWrittenValueUnknown();
+ if ((Kind & AK_MAY) || Ranges.size() > 1) {
Kind = AccessKind(Kind | AK_MAY);
Kind = AccessKind(Kind & ~AK_MUST);
}
"Expect must or may access, not both.");
assert(isAssumption() + isWrite() <= 1 &&
"Expect assumption access or write access, never both.");
+ assert((isMayAccess() || Ranges.size() == 1) &&
+ "Cannot be a must access if there are multiple ranges.");
}
/// Return the access kind.
/// Return true if this is an assumption access.
bool isAssumption() const { return Kind == AK_ASSUMPTION; }
- bool isMustAccess() const { return Kind & AK_MUST; }
- bool isMayAccess() const { return Kind & AK_MAY; }
+ bool isMustAccess() const {
+ bool MustAccess = Kind & AK_MUST;
+ assert((!MustAccess || Ranges.size() < 2) &&
+ "Cannot be a must access if there are multiple ranges.");
+ return MustAccess;
+ }
+
+ bool isMayAccess() const {
+ bool MayAccess = Kind & AK_MAY;
+ assert((MayAccess || Ranges.size() < 2) &&
+ "Cannot be a must access if there are multiple ranges.");
+ return MayAccess;
+ }
/// Return the instruction that causes the access with respect to the local
/// scope of the associated attribute.
/// determined.
std::optional<Value *> getContent() const { return Content; }
- /// Return the offset for this access.
- int64_t getOffset() const { return Range.Offset; }
+ bool hasUniqueRange() const { return Ranges.isUnique(); }
+ const AA::RangeTy &getUniqueRange() const { return Ranges.getUnique(); }
+
+ /// Add a range accessed by this Access.
+ ///
+ /// If there are multiple ranges, then this is a "may access".
+ void addRange(int64_t Offset, int64_t Size) {
+ Ranges.insert({Offset, Size});
+ if (!hasUniqueRange()) {
+ Kind = AccessKind(Kind | AK_MAY);
+ Kind = AccessKind(Kind & ~AK_MUST);
+ }
+ }
+
+ const RangeList &getRanges() const { return Ranges; }
- /// Return the size for this access.
- int64_t getSize() const { return Range.Size; }
+ using const_iterator = RangeList::const_iterator;
+ const_iterator begin() const { return Ranges.begin(); }
+ const_iterator end() const { return Ranges.end(); }
private:
/// The instruction responsible for the access with respect to the local
/// cannot be determined.
std::optional<Value *> Content;
- /// The object accessed, in terms of an offset and size in bytes.
- AA::RangeTy Range;
+ /// Set of potential ranges accessed from the base pointer.
+ RangeList Ranges;
/// The access kind, e.g., READ, as bitset (could be more than one).
AccessKind Kind;
DenseSet<const char *> Allowed(
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID,
- &AAPointerInfo::ID});
+ &AAPointerInfo::ID, &AAPotentialConstantValues::ID});
AttributorConfig AC(CGUpdater);
AC.Allowed = &Allowed;
/// OffsetBin to the bin for its new offset.
///
/// \Returns CHANGED, if the state changed, UNCHANGED otherwise.
- ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size,
+ ChangeStatus addAccess(Attributor &A, const AAPointerInfo::RangeList &Ranges,
Instruction &I, std::optional<Value *> Content,
AAPointerInfo::AccessKind Kind, Type *Ty,
Instruction *RemoteI = nullptr);
bool IsExact = Range == ItRange && !Range.offsetOrSizeAreUnknown();
for (auto Index : It.getSecond()) {
auto &Access = AccessList[Index];
- if (!CB(Access, IsExact))
+ if (!CB(Access, IsExact && Access.hasUniqueRange()))
return false;
}
}
return true;
}
- for (auto LI : LocalList->getSecond()) {
- auto &Access = AccessList[LI];
- Range &= {Access.getOffset(), Access.getSize()};
+ for (unsigned Index : LocalList->getSecond()) {
+ for (auto &R : AccessList[Index]) {
+ Range &= R;
+ if (Range.offsetOrSizeAreUnknown())
+ break;
+ }
}
return forallInterferingAccesses(Range, CB);
}
BooleanState BS;
};
-ChangeStatus AA::PointerInfo::State::addAccess(Attributor &A, int64_t Offset,
- int64_t Size, Instruction &I,
- std::optional<Value *> Content,
- AAPointerInfo::AccessKind Kind,
- Type *Ty, Instruction *RemoteI) {
+ChangeStatus AA::PointerInfo::State::addAccess(
+ Attributor &A, const AAPointerInfo::RangeList &Ranges, Instruction &I,
+ std::optional<Value *> Content, AAPointerInfo::AccessKind Kind, Type *Ty,
+ Instruction *RemoteI) {
RemoteI = RemoteI ? RemoteI : &I;
- AAPointerInfo::Access Acc(&I, RemoteI, Offset, Size, Content, Kind, Ty);
// Check if we have an access for this instruction, if not, simply add it.
auto &LocalList = RemoteIMap[RemoteI];
break;
}
}
+
+ auto AddToBins = [&](const AAPointerInfo::RangeList &ToAdd) {
+ LLVM_DEBUG(if (ToAdd.size()) dbgs()
+ << "[AAPointerInfo] Inserting access in new offset bins\n";);
+
+ for (auto Key : ToAdd) {
+ LLVM_DEBUG(dbgs() << " key " << Key << "\n");
+ OffsetBins[Key].insert(AccIndex);
+ }
+ };
+
if (!AccExists) {
- AccessList.push_back(Acc);
+ AccessList.emplace_back(&I, RemoteI, Ranges, Content, Kind, Ty);
+ assert((AccessList.size() == AccIndex + 1) &&
+ "New Access should have been at AccIndex");
LocalList.push_back(AccIndex);
- } else {
- // The new one will be combined with the existing one.
- auto &Current = AccessList[AccIndex];
- auto Before = Current;
- Current &= Acc;
- if (Current == Before)
- return ChangeStatus::UNCHANGED;
+ AddToBins(AccessList[AccIndex].getRanges());
+ return ChangeStatus::CHANGED;
+ }
+
+ // Combine the new Access with the existing Access, and then update the
+ // mapping in the offset bins.
+ AAPointerInfo::Access Acc(&I, RemoteI, Ranges, Content, Kind, Ty);
+ auto &Current = AccessList[AccIndex];
+ auto Before = Current;
+ Current &= Acc;
+ if (Current == Before)
+ return ChangeStatus::UNCHANGED;
- Acc = Current;
- AA::RangeTy Key{Before.getOffset(), Before.getSize()};
+ auto &ExistingRanges = Before.getRanges();
+ auto &NewRanges = Current.getRanges();
+
+ // Ranges that are in the old access but not the new access need to be removed
+ // from the offset bins.
+ AAPointerInfo::RangeList ToRemove;
+ AAPointerInfo::RangeList::set_difference(ExistingRanges, NewRanges, ToRemove);
+ LLVM_DEBUG(if (ToRemove.size()) dbgs()
+ << "[AAPointerInfo] Removing access from old offset bins\n";);
+
+ for (auto Key : ToRemove) {
+ LLVM_DEBUG(dbgs() << " key " << Key << "\n");
assert(OffsetBins.count(Key) && "Existing Access must be in some bin.");
auto &Bin = OffsetBins[Key];
assert(Bin.count(AccIndex) &&
"Expected bin to actually contain the Access.");
- LLVM_DEBUG(dbgs() << "[AAPointerInfo] Removing Access "
- << AccessList[AccIndex] << " with key {" << Key.Offset
- << ',' << Key.Size << "}\n");
Bin.erase(AccIndex);
}
- AA::RangeTy Key{Acc.getOffset(), Acc.getSize()};
- LLVM_DEBUG(dbgs() << "[AAPointerInfo] Inserting Access " << Acc
- << " with key {" << Key.Offset << ',' << Key.Size << "}\n");
- OffsetBins[Key].insert(AccIndex);
+ // Ranges that are in the new access but not the old access need to be added
+ // to the offset bins.
+ AAPointerInfo::RangeList ToAdd;
+ AAPointerInfo::RangeList::set_difference(NewRanges, ExistingRanges, ToAdd);
+ AddToBins(ToAdd);
return ChangeStatus::CHANGED;
}
namespace {
-/// Helper struct, will support ranges eventually.
-///
-/// FIXME: Tracks a single Offset until we have proper support for a list of
-/// RangeTy objects.
+/// A helper containing a list of offsets computed for a Use. Ideally this
+/// list should be strictly ascending, but we ensure that only when we
+/// actually translate the list of offsets to a RangeList.
struct OffsetInfo {
- int64_t Offset = AA::RangeTy::Unassigned;
+ using VecTy = SmallVector<int64_t>;
+ using const_iterator = VecTy::const_iterator;
+ VecTy Offsets;
+
+ const_iterator begin() const { return Offsets.begin(); }
+ const_iterator end() const { return Offsets.end(); }
- bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
+ bool operator==(const OffsetInfo &RHS) const {
+ return Offsets == RHS.Offsets;
+ }
+
+ void insert(int64_t Offset) { Offsets.push_back(Offset); }
+ bool isUnassigned() const { return Offsets.size() == 0; }
+
+ bool isUnknown() const {
+ if (isUnassigned())
+ return false;
+ if (Offsets.size() == 1)
+ return Offsets.front() == AA::RangeTy::Unknown;
+ return false;
+ }
+
+ void setUnknown() {
+ Offsets.clear();
+ Offsets.push_back(AA::RangeTy::Unknown);
+ }
+
+ void addToAll(int64_t Inc) {
+ for (auto &Offset : Offsets) {
+ Offset += Inc;
+ }
+ }
+
+ /// Copy offsets from \p R into the current list.
+ ///
+ /// Ideally all lists should be strictly ascending, but we defer that to the
+ /// actual use of the list. So we just blindly append here.
+ void merge(const OffsetInfo &R) { Offsets.append(R.Offsets); }
};
+static raw_ostream &operator<<(raw_ostream &OS, const OffsetInfo &OI) {
+ ListSeparator LS;
+ OS << "[";
+ for (auto Offset : OI) {
+ OS << LS << Offset;
+ }
+ OS << "]";
+ return OS;
+}
+
struct AAPointerInfoImpl
: public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
RAcc.getContent(), CB, *this, UsedAssumedInformation);
AK = AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW));
AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST));
- Changed =
- Changed | addAccess(A, It.first.Offset, It.first.Size, CB, Content,
- AK, RAcc.getType(), RAcc.getRemoteInst());
+
+ Changed |= addAccess(A, RAcc.getRanges(), CB, Content, AK,
+ RAcc.getType(), RAcc.getRemoteInst());
}
}
return Changed;
}
ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA,
- int64_t Offset, CallBase &CB) {
+ const OffsetInfo &Offsets, CallBase &CB) {
using namespace AA::PointerInfo;
if (!OtherAA.getState().isValidState() || !isValidState())
return indicatePessimisticFixpoint();
ChangeStatus Changed = ChangeStatus::UNCHANGED;
const auto &State = OtherAAImpl.getState();
for (const auto &It : State) {
- AA::RangeTy Range = AA::RangeTy::getUnknown();
- if (Offset != AA::RangeTy::Unknown &&
- !It.first.offsetOrSizeAreUnknown()) {
- Range = AA::RangeTy(It.first.Offset + Offset, It.first.Size);
- }
for (auto Index : It.getSecond()) {
const auto &RAcc = State.getAccess(Index);
- AccessKind AK = RAcc.getKind();
- Changed = Changed | addAccess(A, Range.Offset, Range.Size, CB,
- RAcc.getContent(), AK, RAcc.getType(),
- RAcc.getRemoteInst());
+ for (auto Offset : Offsets) {
+ auto NewRanges = Offset == AA::RangeTy::Unknown
+ ? AA::RangeTy::getUnknown()
+ : RAcc.getRanges();
+ if (!NewRanges.isUnknown()) {
+ NewRanges.addToAllOffsets(Offset);
+ }
+ Changed |=
+ addAccess(A, NewRanges, CB, RAcc.getContent(), RAcc.getKind(),
+ RAcc.getType(), RAcc.getRemoteInst());
+ }
}
}
return Changed;
/// Deal with an access and signal if it was handled successfully.
bool handleAccess(Attributor &A, Instruction &I,
std::optional<Value *> Content, AccessKind Kind,
- int64_t Offset, ChangeStatus &Changed, Type &Ty) {
+ SmallVectorImpl<int64_t> &Offsets, ChangeStatus &Changed,
+ Type &Ty) {
using namespace AA::PointerInfo;
auto Size = AA::RangeTy::Unknown;
const DataLayout &DL = A.getDataLayout();
TypeSize AccessSize = DL.getTypeStoreSize(&Ty);
if (!AccessSize.isScalable())
Size = AccessSize.getFixedSize();
- Changed = Changed | addAccess(A, Offset, Size, I, Content, Kind, &Ty);
+
+ // Make a strictly ascending list of offsets as required by addAccess()
+ llvm::sort(Offsets);
+ auto Last = std::unique(Offsets.begin(), Offsets.end());
+ Offsets.erase(Last, Offsets.end());
+
+ Changed = Changed | addAccess(A, {Offsets, Size}, I, Content, Kind, &Ty);
return true;
};
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
+ void collectConstantsForGEP(Attributor &A, const DataLayout &DL,
+ OffsetInfo &UsrOI, const OffsetInfo &PtrOI,
+ const GEPOperator *GEP, bool &Follow);
+
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
}
};
+/// If the indices to the GEP can be traced to constants, incorporate all
+/// of these into UsrOI.
+void AAPointerInfoFloating::collectConstantsForGEP(
+ Attributor &A, const DataLayout &DL, OffsetInfo &UsrOI,
+ const OffsetInfo &PtrOI, const GEPOperator *GEP, bool &Follow) {
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
+ MapVector<Value *, APInt> VariableOffsets;
+ APInt ConstantOffset(BitWidth, 0);
+
+ Follow = true;
+
+ if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
+ UsrOI.setUnknown();
+ return;
+ }
+
+ UsrOI = PtrOI;
+ if (VariableOffsets.empty()) {
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP
+ << "\n");
+ UsrOI.addToAll(ConstantOffset.getSExtValue());
+ return;
+ }
+
+ auto Union = UsrOI;
+ Union.addToAll(ConstantOffset.getSExtValue());
+
+ // Each VI in VariableOffsets has a set of potential constant values. Every
+ // combination of elements, picked one each from these sets, is separately
+ // added to the original set of offsets, thus resulting in more offsets.
+ for (const auto &VI : VariableOffsets) {
+ auto &PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*VI.first), DepClassTy::OPTIONAL);
+ if (!PotentialConstantsAA.isValidState()) {
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is not constant " << *GEP
+ << "\n");
+ UsrOI.setUnknown();
+ return;
+ }
+ OffsetInfo NewUnion;
+ for (const auto &ConstOffset : PotentialConstantsAA.getAssumedSet()) {
+ auto CopyPerOffset = Union;
+ CopyPerOffset.addToAll(ConstOffset.getSExtValue() *
+ VI.second.getZExtValue());
+ NewUnion.merge(CopyPerOffset);
+ }
+ Union = NewUnion;
+ }
+
+ UsrOI = std::move(Union);
+ return;
+}
+
ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
using namespace AA::PointerInfo;
ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ const DataLayout &DL = A.getDataLayout();
Value &AssociatedValue = getAssociatedValue();
- const DataLayout &DL = A.getDataLayout();
DenseMap<Value *, OffsetInfo> OffsetInfoMap;
- OffsetInfoMap[&AssociatedValue] = OffsetInfo{0};
+ OffsetInfoMap[&AssociatedValue].insert(0);
auto HandlePassthroughUser = [&](Value *Usr, const OffsetInfo &PtrOI,
bool &Follow) {
- assert(PtrOI.Offset != AA::RangeTy::Unassigned &&
+ assert(!PtrOI.isUnassigned() &&
"Cannot pass through if the input Ptr was not visited!");
OffsetInfoMap[Usr] = PtrOI;
Follow = true;
// already in it though.
auto &UsrOI = OffsetInfoMap[Usr];
auto &PtrOI = OffsetInfoMap[CurPtr];
- UsrOI = PtrOI;
+ Follow = true;
- // TODO: Use range information.
- APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
- if (PtrOI.Offset == AA::RangeTy::Unknown ||
- !GEP->accumulateConstantOffset(DL, GEPOffset)) {
- LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset not constant " << *GEP
- << "\n");
- UsrOI.Offset = AA::RangeTy::Unknown;
- Follow = true;
+ if (PtrOI.isUnknown()) {
+ UsrOI.setUnknown();
return true;
}
- LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP
- << "\n");
- UsrOI.Offset = PtrOI.Offset + GEPOffset.getZExtValue();
- Follow = true;
+ collectConstantsForGEP(A, DL, UsrOI, PtrOI, GEP, Follow);
return true;
}
if (isa<PtrToIntInst>(Usr))
// Check if the PHI operand has already an unknown offset as we can't
// improve on that anymore.
- if (PtrOI.Offset == AA::RangeTy::Unknown) {
+ if (PtrOI.isUnknown()) {
LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand offset unknown "
<< *CurPtr << " in " << *Usr << "\n");
- Follow = UsrOI.Offset != AA::RangeTy::Unknown;
- UsrOI = PtrOI;
+ Follow = !UsrOI.isUnknown();
+ UsrOI.setUnknown();
return true;
}
// Check if the PHI is invariant (so far).
if (UsrOI == PtrOI) {
- assert(PtrOI.Offset != AA::RangeTy::Unassigned &&
+ assert(!PtrOI.isUnassigned() &&
"Cannot assign if the current Ptr was not visited!");
LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant (so far)");
return true;
DL, Offset, /* AllowNonInbounds */ true);
auto It = OffsetInfoMap.find(CurPtrBase);
if (It != OffsetInfoMap.end()) {
- Offset += It->getSecond().Offset;
- if (IsFirstPHIUser || Offset == UsrOI.Offset)
+ auto BaseOI = It->getSecond();
+ BaseOI.addToAll(Offset.getZExtValue());
+ if (IsFirstPHIUser || BaseOI == UsrOI) {
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant " << *CurPtr
+ << " in " << *Usr << "\n");
return HandlePassthroughUser(Usr, PtrOI, Follow);
+ }
LLVM_DEBUG(
dbgs() << "[AAPointerInfo] PHI operand pointer offset mismatch "
<< *CurPtr << " in " << *Usr << "\n");
}
// TODO: Approximate in case we know the direction of the recurrence.
- UsrOI = PtrOI;
- UsrOI.Offset = AA::RangeTy::Unknown;
+ UsrOI.setUnknown();
Follow = true;
return true;
}
else
AK = AccessKind(AK | AccessKind::AK_MAY);
if (!handleAccess(A, *LoadI, /* Content */ nullptr, AK,
- OffsetInfoMap[CurPtr].Offset, Changed,
+ OffsetInfoMap[CurPtr].Offsets, Changed,
*LoadI->getType()))
return false;
return handleAccess(
A, *Assumption.second, Assumption.first, AccessKind::AK_ASSUMPTION,
- OffsetInfoMap[CurPtr].Offset, Changed, *LoadI->getType());
+ OffsetInfoMap[CurPtr].Offsets, Changed, *LoadI->getType());
}
auto HandleStoreLike = [&](Instruction &I, Value *ValueOp, Type &ValueTy,
if (ValueOp)
Content = A.getAssumedSimplified(
*ValueOp, *this, UsedAssumedInformation, AA::Interprocedural);
- return handleAccess(A, I, Content, AK, OffsetInfoMap[CurPtr].Offset,
+ return handleAccess(A, I, Content, AK, OffsetInfoMap[CurPtr].Offsets,
Changed, ValueTy);
};
const auto &CSArgPI = A.getAAFor<AAPointerInfo>(
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::REQUIRED);
- Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr].Offset,
- *CB) |
+ Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr], *CB) |
Changed;
return isValidState();
}
LLVM_DEBUG({
if (!(OffsetInfoMap[NewU] == OffsetInfoMap[OldU])) {
dbgs() << "[AAPointerInfo] Equivalent use callback failed: "
- << OffsetInfoMap[NewU].Offset << " vs "
- << OffsetInfoMap[OldU].Offset << "\n";
+ << OffsetInfoMap[NewU] << " vs " << OffsetInfoMap[OldU]
+ << "\n";
}
});
return OffsetInfoMap[NewU] == OffsetInfoMap[OldU];
auto Kind =
ArgNo == 0 ? AccessKind::AK_MUST_WRITE : AccessKind::AK_MUST_READ;
Changed =
- Changed | addAccess(A, 0, LengthVal, *MI, nullptr, Kind, nullptr);
+ Changed | addAccess(A, {0, LengthVal}, *MI, nullptr, Kind, nullptr);
}
LLVM_DEBUG({
dbgs() << "Accesses by bin after update:\n";
bool ReadOnly = AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown);
auto Kind =
ReadOnly ? AccessKind::AK_MAY_READ : AccessKind::AK_MAY_READ_WRITE;
- return addAccess(A, AA::RangeTy::Unknown, AA::RangeTy::Unknown, *getCtxI(),
- nullptr, Kind, nullptr);
+ return addAccess(A, AA::RangeTy::getUnknown(), *getCtxI(), nullptr, Kind,
+ nullptr);
}
/// See AbstractAttribute::trackStatistics()
; offsets of the phi cannot be determined, and hence the attirbutor assumes that
; hostcall is in use.
+; CHECK-LABEL: amdhsa.kernels:
; CHECK: .value_kind: hidden_hostcall_buffer
; CHECK: .value_kind: hidden_multigrid_sync_arg
+; CHECK-LABEL: .name: kernel_1
-define amdgpu_kernel void @the_kernel(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) {
+define amdgpu_kernel void @kernel_1(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) {
entry:
%tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
br i1 %cond, label %old, label %new
ret void
}
+; The call to intrinsic implicitarg_ptr is combined with an offset produced by
+; select'ing between two constants, before it is eventually used in a GEP to
+; form the address of a load. This test ensures that AAPointerInfo can look
+; through the select to maintain a set of indices, so that it can precisely
+; determine that hostcall and other expensive implicit args are not in use.
+
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-NOT: hidden_multigrid_sync_arg
+; CHECK-LABEL: .name: kernel_2
+
+define amdgpu_kernel void @kernel_2(i32 addrspace(1)* %a, i1 %cond) {
+entry:
+ %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %tmp5 = select i1 %cond, i64 12, i64 18
+ %tmp6 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %tmp5
+ %tmp8 = bitcast i8 addrspace(4)* %tmp6 to i16 addrspace(4)*
+
+ ;;; THIS USE is where multiple offsets are possible, relative to implicitarg_ptr
+ %tmp9 = load i16, i16 addrspace(4)* %tmp8, align 2
+
+ %idx.ext = sext i16 %tmp9 to i64
+ %add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
+ %tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x()
declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
}
define internal i8 @read_arg_index(i8* %p, i64 %index) {
-; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read)
-; TUNIT-LABEL: define {{[^@]+}}@read_arg_index
-; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]]) #[[ATTR0:[0-9]+]] {
-; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 2
-; TUNIT-NEXT: ret i8 [[L]]
-;
; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read)
; CGSCC-LABEL: define {{[^@]+}}@read_arg_index
-; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR0]] {
+; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly align 16 dereferenceable(1024) [[P:%.*]]) #[[ATTR0]] {
; CGSCC-NEXT: entry:
-; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1
+; CGSCC-NEXT: [[G:%.*]] = getelementptr inbounds i8, i8* [[P]], i64 2
+; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[G]], align 1
; CGSCC-NEXT: ret i8 [[L]]
;
entry:
define i8 @call_simplifiable_1() {
; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_1
-; TUNIT-SAME: () #[[ATTR1:[0-9]+]] {
+; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2
-; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3:[0-9]+]]
+; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR4:[0-9]+]]
; CGSCC-NEXT: ret i8 [[R]]
;
entry:
; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read)
; CGSCC-LABEL: define {{[^@]+}}@sum_two_same_loads
; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR2:[0-9]+]] {
-; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4:[0-9]+]]
-; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]]
+; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR5:[0-9]+]]
+; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR5]]
; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]]
; CGSCC-NEXT: ret i8 [[Z]]
;
define i8 @call_simplifiable_2() {
; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2
-; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-SAME: () #[[ATTR0]] {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2
; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3
; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1
-; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]]
+; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR4]]
; CGSCC-NEXT: ret i8 [[R]]
;
entry:
ret i8 %r
}
-define i8 @call_not_simplifiable_1() {
+define i8 @call_simplifiable_3() {
; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_1
-; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_3
+; TUNIT-SAME: () #[[ATTR0]] {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
-; TUNIT-NEXT: store i8 2, i8* [[I0]], align 2
-; TUNIT-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2:[0-9]+]]
-; TUNIT-NEXT: ret i8 [[R]]
+; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT: ret i8 2
;
; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_1
+; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_3
; CGSCC-SAME: () #[[ATTR1]] {
; CGSCC-NEXT: entry:
; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
-; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2
-; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]]
+; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
+; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
+; CGSCC-NEXT: store i8 2, i8* [[I2]], align 2
+; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 16 dereferenceable(1024) [[I0]]) #[[ATTR4]]
; CGSCC-NEXT: ret i8 [[R]]
;
entry:
%Bytes = alloca [1024 x i8], align 16
- %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2
- store i8 2, i8* %i0, align 1
- %r = call i8 @read_arg_index(i8* %i0, i64 0)
+ %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0
+ %i2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2
+ store i8 2, i8* %i2, align 1
+ %r = call i8 @read_arg_index(i8* %i0, i64 2)
ret i8 %r
}
define internal i8 @read_arg_2(i8* %p) {
; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read)
; TUNIT-LABEL: define {{[^@]+}}@read_arg_2
-; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[P:%.*]]) #[[ATTR0]] {
+; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[P:%.*]]) #[[ATTR1:[0-9]+]] {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1
; TUNIT-NEXT: ret i8 [[L]]
define internal i8 @sum_two_different_loads(i8* %p, i8* %q) {
; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read)
; TUNIT-LABEL: define {{[^@]+}}@sum_two_different_loads
-; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR0]] {
-; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P]]) #[[ATTR2]]
-; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR2]]
+; TUNIT-SAME: (i8* nocapture nofree nonnull readonly dereferenceable(972) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree nonnull readonly dereferenceable(972) [[P]]) #[[ATTR3:[0-9]+]]
+; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q]]) #[[ATTR3]]
; TUNIT-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]]
; TUNIT-NEXT: ret i8 [[Z]]
;
; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read)
; CGSCC-LABEL: define {{[^@]+}}@sum_two_different_loads
-; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR2]] {
-; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]]
-; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR4]]
+; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q:%.*]]) #[[ATTR2]] {
+; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[P]]) #[[ATTR5]]
+; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q]]) #[[ATTR5]]
; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]]
; CGSCC-NEXT: ret i8 [[Z]]
;
ret i8 %z
}
-define i8 @call_not_simplifiable_2() {
+define i8 @call_partially_simplifiable_1() {
; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_2
-; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_1
+; TUNIT-SAME: () #[[ATTR0]] {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
-; TUNIT-NEXT: store i8 2, i8* [[I0]], align 2
-; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3
-; TUNIT-NEXT: store i8 3, i8* [[I1]], align 1
-; TUNIT-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
-; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR2]]
+; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT: store i8 2, i8* [[I2]], align 2
+; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3
+; TUNIT-NEXT: store i8 3, i8* [[I3]], align 1
+; TUNIT-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 4
+; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR3]]
; TUNIT-NEXT: ret i8 [[R]]
;
; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_2
+; CGSCC-LABEL: define {{[^@]+}}@call_partially_simplifiable_1
; CGSCC-SAME: () #[[ATTR1]] {
; CGSCC-NEXT: entry:
; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
-; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2
-; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3
-; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1
-; CGSCC-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
-; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR3]]
+; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
+; CGSCC-NEXT: store i8 2, i8* [[I2]], align 2
+; CGSCC-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3
+; CGSCC-NEXT: store i8 3, i8* [[I3]], align 1
+; CGSCC-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 4
+; CGSCC-NEXT: store i8 4, i8* [[I4]], align 4
+; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR4]]
; CGSCC-NEXT: ret i8 [[R]]
;
entry:
%Bytes = alloca [1024 x i8], align 16
- %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2
- store i8 2, i8* %i0
- %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3
- store i8 3, i8* %i1
- %base = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0
- %r = call i8 @sum_two_different_loads(i8* %i0, i8* %i1)
+ %i2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2
+ store i8 2, i8* %i2
+ %i3 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3
+ store i8 3, i8* %i3
+ %i4 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 4
+ ;;; This store is redundant, hence removed.
+ store i8 4, i8* %i4
+ %r = call i8 @sum_two_different_loads(i8* %i2, i8* %i3)
+ ret i8 %r
+}
+
+define i8 @call_partially_simplifiable_2(i1 %cond) {
+; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn
+; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_2
+; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR2:[0-9]+]] {
+; TUNIT-NEXT: entry:
+; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; TUNIT-NEXT: [[I51:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 51
+; TUNIT-NEXT: [[I52:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 52
+; TUNIT-NEXT: store i8 2, i8* [[I52]], align 4
+; TUNIT-NEXT: [[I53:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 53
+; TUNIT-NEXT: store i8 3, i8* [[I53]], align 1
+; TUNIT-NEXT: [[I54:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 54
+; TUNIT-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8* [[I51]], i8* [[I52]]
+; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree nonnull readonly dereferenceable(972) [[SEL]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[I53]]) #[[ATTR3]]
+; TUNIT-NEXT: ret i8 [[R]]
+;
+; CGSCC: Function Attrs: nofree nosync nounwind willreturn
+; CGSCC-LABEL: define {{[^@]+}}@call_partially_simplifiable_2
+; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
+; CGSCC-NEXT: entry:
+; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CGSCC-NEXT: [[I51:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 51
+; CGSCC-NEXT: [[I52:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 52
+; CGSCC-NEXT: store i8 2, i8* [[I52]], align 4
+; CGSCC-NEXT: [[I53:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 53
+; CGSCC-NEXT: store i8 3, i8* [[I53]], align 1
+; CGSCC-NEXT: [[I54:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 54
+; CGSCC-NEXT: store i8 4, i8* [[I54]], align 2
+; CGSCC-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8* [[I51]], i8* [[I52]]
+; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[SEL]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[I53]]) #[[ATTR4]]
+; CGSCC-NEXT: ret i8 [[R]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %i51 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 51
+ %i52 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 52
+ store i8 2, i8* %i52
+ %i53 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 53
+ store i8 3, i8* %i53
+ %i54 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 54
+ ;;; This store is redundant, hence removed. Not affected by the select.
+ store i8 4, i8* %i54
+ %sel = select i1 %cond, i8* %i51, i8 *%i52
+ %r = call i8 @sum_two_different_loads(i8* %sel, i8* %i53)
ret i8 %r
}
;.
-; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) }
-; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) }
-; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn }
+; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) }
+; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) }
+; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn }
+; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn }
;.
; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) }
; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) }
; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(argmem: read) }
-; CGSCC: attributes #[[ATTR3]] = { willreturn }
-; CGSCC: attributes #[[ATTR4]] = { willreturn memory(read) }
+; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn }
+; CGSCC: attributes #[[ATTR4]] = { willreturn }
+; CGSCC: attributes #[[ATTR5]] = { willreturn memory(read) }
;.
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+
+%struct.T = type { i32, [10 x [20 x i8]] }
+
+define i8 @select_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_1
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
+; CHECK-NEXT: store i8 23, i8* [[GEP23]], align 4
+; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29
+; CHECK-NEXT: store i8 29, i8* [[GEP29]], align 4
+; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7
+; CHECK-NEXT: store i8 7, i8* [[GEP7]], align 4
+; CHECK-NEXT: [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31
+; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
+; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]]
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_SEL]], align 4
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+
+ %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
+ store i8 23, i8* %gep23, align 4
+ %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29
+ store i8 29, i8* %gep29, align 4
+ %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7
+ store i8 7, i8* %gep7, align 4
+
+ ;; This store is redundant, hence removed.
+ %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31
+ store i8 42, i8* %gep31, align 4
+
+ %sel0 = select i1 %cnd1, i64 23, i64 29
+ %sel1 = select i1 %cnd2, i64 %sel0, i64 7
+ %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1
+ %i = load i8, i8* %gep.sel, align 4
+ ret i8 %i
+}
+
+define i8 @select_offsets_simplifiable_2(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_2
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
+; CHECK-NEXT: store i8 23, i8* [[GEP23]], align 4
+; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29
+; CHECK-NEXT: store i8 29, i8* [[GEP29]], align 4
+; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7
+; CHECK-NEXT: store i8 7, i8* [[GEP7]], align 4
+; CHECK-NEXT: [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31
+; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 20, i64 26
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 4
+; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]]
+; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_PLUS]], align 4
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+
+ %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
+ store i8 23, i8* %gep23, align 4
+ %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29
+ store i8 29, i8* %gep29, align 4
+ %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7
+ store i8 7, i8* %gep7, align 4
+
+ ;; This store is redundant, hence removed.
+ %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31
+ store i8 42, i8* %gep31, align 4
+
+ ;; Adjust the offsets so that they match the stores after adding 3
+ %sel0 = select i1 %cnd1, i64 20, i64 26
+ %sel1 = select i1 %cnd2, i64 %sel0, i64 4
+ %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1
+ %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3
+ %i = load i8, i8* %gep.plus, align 4
+ ret i8 %i
+}
+
+define i8 @select_offsets_simplifiable_3(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_3
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BUNDLE:%.*]] = alloca [[STRUCT_T:%.*]], align 64
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND1]], i64 1, i64 3
+; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CND2]], i64 5, i64 11
+; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [[STRUCT_T]], %struct.T* [[BUNDLE]], i64 0, i32 1, i64 [[SEL1]], i64 [[SEL2]]
+; CHECK-NEXT: ret i8 100
+;
+entry:
+ %bundle = alloca %struct.T, align 64
+ %gep.fixed = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 1, i64 1
+ store i8 100, i8* %gep.fixed, align 4
+ %sel1 = select i1 %cnd1, i64 1, i64 3
+ %sel2 = select i1 %cnd2, i64 5, i64 11
+ %gep.sel = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 %sel1, i64 %sel2
+ store i8 42, i8* %gep.sel, align 4
+ %i = load i8, i8* %gep.fixed, align 4
+ ret i8 %i
+}
+
+define i8 @select_offsets_not_simplifiable_1(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_1
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
+; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
+; CHECK-NEXT: store i8 100, i8* [[GEP23]], align 4
+; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]]
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_SEL]], align 4
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %sel0 = select i1 %cnd1, i64 23, i64 29
+ %sel1 = select i1 %cnd2, i64 %sel0, i64 7
+ %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
+ store i8 100, i8* %gep23, align 4
+ %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1
+ %i = load i8, i8* %gep.sel, align 4
+ ret i8 %i
+}
+
+define i8 @select_offsets_not_simplifiable_2(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_2
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
+; CHECK-NEXT: [[GEP32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 32
+; CHECK-NEXT: store i8 100, i8* [[GEP32]], align 16
+; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]]
+; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_PLUS]], align 4
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %sel0 = select i1 %cnd1, i64 23, i64 29
+ %sel1 = select i1 %cnd2, i64 %sel0, i64 7
+ %gep32 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 32
+ store i8 100, i8* %gep32, align 4
+ %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1
+ %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3
+ %i = load i8, i8* %gep.plus, align 4
+ ret i8 %i
+}
+
+define i8 @select_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_3
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
+; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]]
+; CHECK-NEXT: store i8 100, i8* [[GEP_SEL]], align 4
+; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP29]], align 4
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %sel0 = select i1 %cnd1, i64 23, i64 29
+ %sel1 = select i1 %cnd2, i64 %sel0, i64 7
+ %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1
+ store i8 100, i8* %gep.sel, align 4
+ %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29
+ %i = load i8, i8* %gep29, align 4
+ ret i8 %i
+}
+
+define i8 @select_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_4
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
+; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]]
+; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3
+; CHECK-NEXT: store i8 100, i8* [[GEP_PLUS]], align 4
+; CHECK-NEXT: [[GEP32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 32
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP32]], align 16
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %sel0 = select i1 %cnd1, i64 23, i64 29
+ %sel1 = select i1 %cnd2, i64 %sel0, i64 7
+ %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1
+ %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3
+ store i8 100, i8* %gep.plus, align 4
+ %gep32 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 32
+ %i = load i8, i8* %gep32, align 4
+ ret i8 %i
+}
+
+define i8 @select_offsets_not_simplifiable_5(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_5
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BUNDLE:%.*]] = alloca [[STRUCT_T:%.*]], align 64
+; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [[STRUCT_T]], %struct.T* [[BUNDLE]], i64 0, i32 1, i64 3, i64 5
+; CHECK-NEXT: store i8 100, i8* [[GEP_FIXED]], align 4
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND1]], i64 1, i64 3
+; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CND2]], i64 5, i64 11
+; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [[STRUCT_T]], %struct.T* [[BUNDLE]], i64 0, i32 1, i64 [[SEL1]], i64 [[SEL2]]
+; CHECK-NEXT: store i8 42, i8* [[GEP_SEL]], align 4
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_FIXED]], align 4
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %bundle = alloca %struct.T, align 64
+ %gep.fixed = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 3, i64 5
+ store i8 100, i8* %gep.fixed, align 4
+ %sel1 = select i1 %cnd1, i64 1, i64 3
+ %sel2 = select i1 %cnd2, i64 5, i64 11
+ %gep.sel = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 %sel1, i64 %sel2
+
+ ;; This store prevents the constant 100 from being propagated to ret
+ store i8 42, i8* %gep.sel, align 4
+
+ %i = load i8, i8* %gep.fixed, align 4
+ ret i8 %i
+}
+
+define i8 @select_gep_simplifiable_1(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@select_gep_simplifiable_1
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7
+; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
+; CHECK-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]]
+; CHECK-NEXT: store i8 42, i8* [[SEL_PTR]], align 4
+; CHECK-NEXT: ret i8 21
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %gep3 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3
+ store i8 21, i8* %gep3, align 4
+ %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7
+ %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
+ %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23
+ store i8 42, i8* %sel.ptr, align 4
+ %i = load i8, i8* %gep3, align 4
+ ret i8 %i
+}
+
+define i8 @select_gep_not_simplifiable_1(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn
+; CHECK-LABEL: define {{[^@]+}}@select_gep_not_simplifiable_1
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7
+; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
+; CHECK-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]]
+; CHECK-NEXT: store i8 42, i8* [[SEL_PTR]], align 4
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP7]], align 4
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7
+ %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
+ %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23
+ store i8 42, i8* %sel.ptr, align 4
+ %i = load i8, i8* %gep7, align 4
+ ret i8 %i
+}
+
+; FIXME: This should be simplifiable. See comment inside.
+
+define i8 @phi_offsets_fixme(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@phi_offsets_fixme
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
+; CHECK-NEXT: store i8 100, i8* [[GEP_FIXED]], align 16
+; CHECK-NEXT: br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[JOIN:%.*]]
+; CHECK: else:
+; CHECK-NEXT: br label [[JOIN]]
+; CHECK: join:
+; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 3, [[THEN]] ], [ 11, [[ELSE]] ]
+; CHECK-NEXT: [[GEP_PHI:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[PHI]]
+; CHECK-NEXT: store i8 42, i8* [[GEP_PHI]], align 4
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_FIXED]], align 16
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %gep.fixed = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0
+ store i8 100, i8* %gep.fixed, align 4
+ br i1 %cnd1, label %then, label %else
+
+then:
+ br label %join
+
+else:
+ br label %join
+
+join:
+ ; FIXME: AAPotentialConstantValues does not detect the constant values for the
+ ; PHI below. It needs to rely on AAPotentialValues.
+ %phi = phi i64 [ 3, %then ], [ 11, %else ]
+ %gep.phi = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %phi
+ store i8 42, i8* %gep.phi, align 4
+ %i = load i8, i8* %gep.fixed, align 4
+ ret i8 %i
+}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(write) }
+; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn }
+;.
ret void
}
-define i8 @multiple_offsets_not_simplifiable_1(i1 %cnd1, i1 %cnd2) {
-; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_1
-; TUNIT-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR3]] {
-; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7
-; TUNIT-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
-; TUNIT-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]]
-; TUNIT-NEXT: store i8 42, i8* [[SEL_PTR]], align 4
-; TUNIT-NEXT: [[I:%.*]] = load i8, i8* [[GEP7]], align 4
-; TUNIT-NEXT: ret i8 [[I]]
-;
-; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_1
-; CGSCC-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR5]] {
-; CGSCC-NEXT: entry:
-; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; CGSCC-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7
-; CGSCC-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
-; CGSCC-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]]
-; CGSCC-NEXT: store i8 42, i8* [[SEL_PTR]], align 4
-; CGSCC-NEXT: [[I:%.*]] = load i8, i8* [[GEP7]], align 4
-; CGSCC-NEXT: ret i8 [[I]]
+define i8 @gep_index_from_binary_operator(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@gep_index_from_binary_operator
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 12
+; CHECK-NEXT: ret i8 100
;
entry:
%Bytes = alloca [1024 x i8], align 16
- %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7
- %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
- ; %phi.ptr = phi i8* [ %gep7, %then ], [ %gep23, %else ]
- %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23
- store i8 42, i8* %sel.ptr, align 4
- %i = load i8, i8* %gep7, align 4
+ %offset = add i64 5, 7
+ %gep.fixed = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 12
+ %gep.sum = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %offset
+ store i8 100, i8* %gep.fixed, align 4
+ %i = load i8, i8* %gep.sum, align 4
ret i8 %i
}
+; FIXME: This should be simplifiable. See comment inside.
+
+define i8 @gep_index_from_memory(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@gep_index_from_memory
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 12
+; CHECK-NEXT: [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 12
+; CHECK-NEXT: store i8 100, i8* [[GEP_LOADED]], align 4
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_FIXED]], align 4
+; CHECK-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %addr = alloca i64, align 16
+ %gep.addr = getelementptr inbounds i64, i64* %addr, i64 0
+ store i64 12, i64* %gep.addr, align 8
+ %offset = load i64, i64* %gep.addr, align 8
+ %gep.fixed = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 12
+
+ ; FIXME: AAPotentialConstantValues does not detect the constant offset being
+ ; passed to this GEP. It needs to rely on AAPotentialValues.
+ %gep.loaded = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %offset
+ store i8 100, i8* %gep.loaded, align 4
+
+ %i = load i8, i8* %gep.fixed, align 4
+ ret i8 %i
+}
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}