class InterleavedAccessInfo {
public:
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
- DominatorTree *DT, LoopInfo *LI)
- : PSE(PSE), TheLoop(L), DT(DT), LI(LI) {}
+ DominatorTree *DT, LoopInfo *LI,
+ const LoopAccessInfo *LAI)
+ : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
~InterleavedAccessInfo() {
SmallSet<InterleaveGroup *, 4> DelSet;
/// \brief Analyze the interleaved accesses and collect them in interleave
/// groups. Substitute symbolic strides using \p Strides.
- void analyzeInterleaving(const ValueToValueMap &Strides);
+ void analyzeInterleaving();
/// \brief Check if \p Instr belongs to any interleave group.
bool isInterleaved(Instruction *Instr) const {
/// out-of-bounds requires a scalar epilogue iteration for correctness.
bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
- /// \brief Initialize the LoopAccessInfo used for dependence checking.
- void setLAI(const LoopAccessInfo *Info) { LAI = Info; }
-
private:
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
/// Simplifies SCEV expressions in the context of existing SCEV assumptions.
Loop *TheLoop;
DominatorTree *DT;
LoopInfo *LI;
- const LoopAccessInfo *LAI = nullptr;
+ const LoopAccessInfo *LAI;
/// True if the loop may contain non-reversed interleaved groups with
/// out-of-bounds accesses. We ensure we don't speculatively access memory
OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
: TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT), GetLAA(GetLAA),
- ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H),
- DB(DB), AC(AC) {}
+ ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
const LoopAccessInfo *getLAI() const { return LAI; }
- /// \brief Check if \p Instr belongs to any interleaved access group.
- bool isAccessInterleaved(Instruction *Instr) {
- return InterleaveInfo.isInterleaved(Instr);
- }
-
- /// \brief Get the interleaved access group that \p Instr belongs to.
- const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
- return InterleaveInfo.getInterleaveGroup(Instr);
- }
-
- /// \brief Returns true if an interleaved group requires a scalar iteration
- /// to handle accesses with gaps.
- bool requiresScalarEpilogue() const {
- return InterleaveInfo.requiresScalarEpilogue();
- }
-
unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
uint64_t getMaxSafeRegisterWidth() const {
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
- /// The interleave access information contains groups of interleaved accesses
- /// with the same stride and close to each other.
- InterleavedAccessInfo InterleaveInfo;
-
// --- vectorization state --- //
/// Holds the primary induction variable. This is the counter of the
const TargetLibraryInfo *TLI, DemandedBits *DB,
AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, const Function *F,
- const LoopVectorizeHints *Hints)
+ const LoopVectorizeHints *Hints,
+ InterleavedAccessInfo &IAI)
: TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
- AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {}
+ AC(AC), ORE(ORE), TheFunction(F), Hints(Hints), InterleaveInfo(IAI) {}
/// \return An upper bound for the vectorization factor, or None if
/// vectorization should be avoided up front.
/// access that can be widened.
bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1);
+ /// \brief Check if \p Instr belongs to any interleaved access group.
+ bool isAccessInterleaved(Instruction *Instr) {
+ return InterleaveInfo.isInterleaved(Instr);
+ }
+
+ /// \brief Get the interleaved access group that \p Instr belongs to.
+ const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
+ return InterleaveInfo.getInterleaveGroup(Instr);
+ }
+
+ /// \brief Returns true if an interleaved group requires a scalar iteration
+ /// to handle accesses with gaps.
+ bool requiresScalarEpilogue() const {
+ return InterleaveInfo.requiresScalarEpilogue();
+ }
+
private:
unsigned NumPredStores = 0;
/// Loop Vectorize Hint.
const LoopVectorizeHints *Hints;
+ /// The interleave access information contains groups of interleaved accesses
+ /// with the same stride and close to each other.
+ InterleavedAccessInfo &InterleaveInfo;
+
/// Values to ignore in the cost model.
SmallPtrSet<const Value *, 16> ValuesToIgnore;
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
- const InterleaveGroup *Group = Legal->getInterleavedAccessGroup(Instr);
+ const InterleaveGroup *Group = Cost->getInterleavedAccessGroup(Instr);
assert(Group && "Fail to get an interleaved access group.");
// Skip if current instruction is not the insert position.
// does not evenly divide the trip count, no adjustment is necessary since
// there will already be scalar iterations. Note that the minimum iterations
// check ensures that N >= Step.
- if (VF > 1 && Legal->requiresScalarEpilogue()) {
+ if (VF > 1 && Cost->requiresScalarEpilogue()) {
auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
R = Builder.CreateSelect(IsZero, Step, R);
}
// vector trip count is zero. This check also covers the case where adding one
// to the backedge-taken count overflowed leading to an incorrect trip count
// of zero. In this case we will also jump to the scalar loop.
- auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
- : ICmpInst::ICMP_ULT;
+ auto P = Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
+ : ICmpInst::ICMP_ULT;
Value *CheckMinIters = Builder.CreateICmp(
P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
: "")
<< "!\n");
- bool UseInterleaved = TTI->enableInterleavedAccessVectorization();
-
- // If an override option has been passed in for interleaved accesses, use it.
- if (EnableInterleavedMemAccesses.getNumOccurrences() > 0)
- UseInterleaved = EnableInterleavedMemAccesses;
-
- // Analyze interleaved memory accesses.
- if (UseInterleaved)
- InterleaveInfo.analyzeInterleaving(*getSymbolicStrides());
-
unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
bool LoopVectorizationLegality::canVectorizeMemory() {
LAI = &(*GetLAA)(*TheLoop);
- InterleaveInfo.setLAI(LAI);
const OptimizationRemarkAnalysis *LAR = LAI->getReport();
if (LAR) {
ORE->emit([&]() {
// this group because it and (2) are dependent. However, (1) can be grouped
// with other accesses that may precede it in program order. Note that a
// bottom-up order does not imply that WAW dependences should not be checked.
-void InterleavedAccessInfo::analyzeInterleaving(
- const ValueToValueMap &Strides) {
+void InterleavedAccessInfo::analyzeInterleaving() {
DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
+ const ValueToValueMap &Strides = LAI->getSymbolicStrides();
// Holds all accesses with a constant stride.
MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
// optimization to non-pointer types.
//
if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) &&
- !Legal->isAccessInterleaved(&I) && !isLegalGatherOrScatter(&I))
+ !isAccessInterleaved(&I) && !isLegalGatherOrScatter(&I))
continue;
MinWidth = std::min(MinWidth,
Type *VectorTy = ToVectorTy(ValTy, VF);
unsigned AS = getMemInstAddressSpace(I);
- auto Group = Legal->getInterleavedAccessGroup(I);
+ auto Group = getInterleavedAccessGroup(I);
assert(Group && "Fail to get an interleaved access group.");
unsigned InterleaveFactor = Group->getFactor();
// Choose between Interleaving, Gather/Scatter or Scalarization.
unsigned InterleaveCost = std::numeric_limits<unsigned>::max();
unsigned NumAccesses = 1;
- if (Legal->isAccessInterleaved(&I)) {
- auto Group = Legal->getInterleavedAccessGroup(&I);
+ if (isAccessInterleaved(&I)) {
+ auto Group = getInterleavedAccessGroup(&I);
assert(Group && "Fail to get an interleaved access group.");
// Make one decision for the whole group.
// If the instructions belongs to an interleave group, the whole group
// receives the same decision. The whole group receives the cost, but
// the cost will actually be assigned to one instruction.
- if (auto Group = Legal->getInterleavedAccessGroup(&I))
+ if (auto Group = getInterleavedAccessGroup(&I))
setWideningDecision(Group, VF, Decision, Cost);
else
setWideningDecision(&I, VF, Decision, Cost);
// Scalarize a widened load of address.
setWideningDecision(I, VF, CM_Scalarize,
(VF * getMemoryInstructionCost(I, 1)));
- else if (auto Group = Legal->getInterleavedAccessGroup(I)) {
+ else if (auto Group = getInterleavedAccessGroup(I)) {
// Scalarize an interleave group of address loads.
for (unsigned I = 0; I < Group->getFactor(); ++I) {
if (Instruction *Member = Group->getMember(I))
VPInterleaveRecipe *
LoopVectorizationPlanner::tryToInterleaveMemory(Instruction *I,
VFRange &Range) {
- const InterleaveGroup *IG = Legal->getInterleavedAccessGroup(I);
+ const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I);
if (!IG)
return nullptr;
// I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
// member of the IG, do not construct any Recipe for it.
- const InterleaveGroup *IG = Legal->getInterleavedAccessGroup(Instr);
+ const InterleaveGroup *IG = CM.getInterleavedAccessGroup(Instr);
if (IG && Instr != IG->getInsertPos() &&
Range.Start >= 2 && // Query is illegal for VF == 1
CM.getWideningDecision(Instr, Range.Start) ==
return false;
}
+ bool UseInterleaved = TTI->enableInterleavedAccessVectorization();
+ InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL.getLAI());
+
+ // If an override option has been passed in for interleaved accesses, use it.
+ if (EnableInterleavedMemAccesses.getNumOccurrences() > 0)
+ UseInterleaved = EnableInterleavedMemAccesses;
+
+ // Analyze interleaved memory accesses.
+ if (UseInterleaved) {
+ IAI.analyzeInterleaving();
+ }
+
// Use the cost model.
LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F,
- &Hints);
+ &Hints, IAI);
CM.collectValuesToIgnore();
// Use the planner for vectorization.