/// @brief The set of base pointers with non-affine accesses.
///
- /// This set contains all base pointers which are used in memory accesses
- /// that can not be detected as affine accesses.
- SetVector<const SCEVUnknown *> NonAffineAccesses;
+ /// This set contains all base pointers and the locations where they are
+ /// used for memory accesses that can not be detected as affine accesses.
+ SetVector<std::pair<const SCEVUnknown *, Loop *>> NonAffineAccesses;
BaseToElSize ElementSize;
/// @brief The region has at least one load instruction.
/// @param Context The current detection context.
/// @param Sizes The sizes of the different array dimensions.
/// @param BasePointer The base pointer we are interested in.
+ /// @param Scope The location where @p BasePointer is being used.
/// @returns True if one or more array sizes could be derived - meaning: we
/// see this array as multi-dimensional.
bool hasValidArraySizes(DetectionContext &Context,
SmallVectorImpl<const SCEV *> &Sizes,
- const SCEVUnknown *BasePointer) const;
+ const SCEVUnknown *BasePointer, Loop *Scope) const;
/// @brief Derive access functions for a given base pointer.
///
///
/// @param Context The current detection context.
/// @param basepointer the base pointer we are interested in.
+ /// @param Scope The location where @p BasePointer is being used.
/// @param True if consistent (multi-dimensional) array accesses could be
/// derived for this array.
bool hasBaseAffineAccesses(DetectionContext &Context,
- const SCEVUnknown *BasePointer) const;
+ const SCEVUnknown *BasePointer, Loop *Scope) const;
// Delinearize all non affine memory accesses and return false when there
// exists a non affine memory access that cannot be delinearized. Return true
///
/// @param S The SCEV to analyze.
/// @param R The region in which we look for dependences.
-bool hasScalarDepsInsideRegion(const llvm::SCEV *S, const llvm::Region *R);
+/// @param Scope Location where the value is needed.
+/// @param AllowLoops Whether loop recurrences outside the loop that are in the
+/// region count as dependence.
+bool hasScalarDepsInsideRegion(const llvm::SCEV *S, const llvm::Region *R,
+ llvm::Loop *Scope, bool AllowLoops);
bool isAffineExpr(const llvm::Region *R, const llvm::SCEV *Expression,
llvm::ScalarEvolution &SE, const llvm::Value *BaseAddress = 0,
InvariantLoadsSetTy *ILS = nullptr);
namespace llvm {
class LoopInfo;
+class Loop;
class ScalarEvolution;
class SCEV;
class Region;
/// @param LI The LoopInfo analysis.
/// @param SE The scalar evolution database.
/// @param R The region out of which SSA names are parameters.
+/// @param Scope Location where the value would by synthesized.
/// @return If the instruction I can be regenerated from its
/// scalar evolution representation, return true,
/// otherwise return false.
bool canSynthesize(const llvm::Value *V, const llvm::LoopInfo *LI,
- llvm::ScalarEvolution *SE, const llvm::Region *R);
+ llvm::ScalarEvolution *SE, const llvm::Region *R,
+ llvm::Loop *Scope);
/// @brief Return the block in which a value is used.
///
bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
SmallVectorImpl<const SCEV *> &Sizes,
- const SCEVUnknown *BasePointer) const {
+ const SCEVUnknown *BasePointer,
+ Loop *Scope) const {
Value *BaseValue = BasePointer->getValue();
Region &CurRegion = Context.CurRegion;
for (const SCEV *DelinearizedSize : Sizes) {
continue;
}
}
- if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion))
+ if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false))
return invalid<ReportNonAffineAccess>(
Context, /*Assert=*/true, DelinearizedSize,
Context.Accesses[BasePointer].front().first, BaseValue);
return true;
}
-bool ScopDetection::hasBaseAffineAccesses(
- DetectionContext &Context, const SCEVUnknown *BasePointer) const {
+bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
+ const SCEVUnknown *BasePointer,
+ Loop *Scope) const {
auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
auto Terms = getDelinearizationTerms(Context, BasePointer);
SE->findArrayDimensions(Terms, Shape->DelinearizedSizes,
Context.ElementSize[BasePointer]);
- if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer))
+ if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
+ Scope))
return false;
return computeAccessFunctions(Context, BasePointer, Shape);
if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
return AllowNonAffine;
- for (const SCEVUnknown *BasePointer : Context.NonAffineAccesses)
- if (!hasBaseAffineAccesses(Context, BasePointer)) {
+ for (auto &Pair : Context.NonAffineAccesses) {
+ auto *BasePointer = Pair.first;
+ auto *Scope = Pair.second;
+ if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
if (KeepGoing)
continue;
else
return false;
}
+ }
return true;
}
Context.Accesses[BP].push_back({Inst, AF});
if (!IsAffine)
- Context.NonAffineAccesses.insert(BP);
+ Context.NonAffineAccesses.insert(
+ std::make_pair(BP, LI->getLoopFor(Inst->getParent())));
} else if (!AllowNonAffine && !IsAffine) {
return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
BV);
// If we can synthesize a PHI we can skip it, however only if it is in
// the region. If it is not it can only be in the exit block of the region.
// In this case we model the operands but not the PHI itself.
- if (!IsExitBlock && canSynthesize(PHI, LI, SE, &R))
+ auto *Scope = LI->getLoopFor(PHI->getParent());
+ if (!IsExitBlock && canSynthesize(PHI, LI, SE, &R, Scope))
return;
// PHI nodes are modeled as if they had been demoted prior to the SCoP
// If the instruction can be synthesized and the user is in the region we do
// not need to add a value dependences.
Region &ScopRegion = scop->getRegion();
- if (canSynthesize(V, LI, SE, &ScopRegion))
+ auto *Scope = LI->getLoopFor(UserBB);
+ if (canSynthesize(V, LI, SE, &ScopRegion, Scope))
return;
// Do not build scalar dependences for required invariant loads as we will
bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) {
Loop *L = getLoopForStmt(Stmt);
return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
- canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion());
+ canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion(), L);
}
void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst,
static int findReferencesInBlock(struct SubtreeReferences &References,
const ScopStmt *Stmt, const BasicBlock *BB) {
for (const Instruction &Inst : *BB)
- for (Value *SrcVal : Inst.operands())
- if (canSynthesize(SrcVal, &References.LI, &References.SE,
- &References.R)) {
- References.SCEVs.insert(
- References.SE.getSCEVAtScope(SrcVal, References.LI.getLoopFor(BB)));
+ for (Value *SrcVal : Inst.operands()) {
+ auto *Scope = References.LI.getLoopFor(BB);
+ if (canSynthesize(SrcVal, &References.LI, &References.SE, &References.R,
+ Scope)) {
+ References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope));
continue;
} else if (Value *NewVal = References.GlobalMap.lookup(SrcVal))
References.Values.insert(NewVal);
+ }
return 0;
}
struct SCEVInRegionDependences
: public SCEVVisitor<SCEVInRegionDependences, bool> {
public:
- /// Returns true when the SCEV has SSA names defined in region R.
- static bool hasDependences(const SCEV *S, const Region *R) {
- SCEVInRegionDependences Ignore(R);
+ /// Returns true when the SCEV has SSA names defined in region R. It @p
+ /// AllowLoops is false, loop dependences are checked as well. AddRec SCEVs
+ /// are only allowed within its loop (current loop determined by @p Scope),
+ /// not outside of it unless AddRec's loop is not even in the region.
+ static bool hasDependences(const SCEV *S, const Region *R, Loop *Scope,
+ bool AllowLoops) {
+ SCEVInRegionDependences Ignore(R, Scope, AllowLoops);
return Ignore.visit(S);
}
- SCEVInRegionDependences(const Region *R) : R(R) {}
+ SCEVInRegionDependences(const Region *R, Loop *Scope, bool AllowLoops)
+ : R(R), Scope(Scope), AllowLoops(AllowLoops) {}
bool visit(const SCEV *Expr) {
return SCEVVisitor<SCEVInRegionDependences, bool>::visit(Expr);
}
bool visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (!AllowLoops) {
+ if (!Scope)
+ return true;
+ auto *L = Expr->getLoop();
+ if (R->contains(L) && !L->contains(Scope))
+ return true;
+ }
+
for (size_t i = 0; i < Expr->getNumOperands(); ++i)
if (visit(Expr->getOperand(i)))
return true;
private:
const Region *R;
+ Loop *Scope;
+ bool AllowLoops;
};
namespace polly {
ST.visitAll(Expr);
}
-bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R) {
- return SCEVInRegionDependences::hasDependences(Expr, R);
+bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R,
+ llvm::Loop *Scope, bool AllowLoops) {
+ return SCEVInRegionDependences::hasDependences(Expr, R, Scope, AllowLoops);
}
bool isAffineExpr(const Region *R, const SCEV *Expr, ScalarEvolution &SE,
}
bool polly::canSynthesize(const Value *V, const llvm::LoopInfo *LI,
- ScalarEvolution *SE, const Region *R) {
+ ScalarEvolution *SE, const Region *R, Loop *Scope) {
if (!V || !SE->isSCEVable(V->getType()))
return false;
- if (const SCEV *Scev = SE->getSCEV(const_cast<Value *>(V)))
+ if (const SCEV *Scev = SE->getSCEVAtScope(const_cast<Value *>(V), Scope))
if (!isa<SCEVCouldNotCompute>(Scev))
- if (!hasScalarDepsInsideRegion(Scev, R))
+ if (!hasScalarDepsInsideRegion(Scev, R, Scope, false))
return true;
return false;
--- /dev/null
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+
+; The loop for.body is a scop with invariant load hoisting, but does not
+; terminate predictably for ScalarEvolution. The scalar %1 therefore is not
+; synthesizable using SCEVExpander. We therefore must have Stmt_for_end_loopexit
+; to catch the induction variable at loop exit. We also check for not crashing
+; at codegen because SCEVExpander would use the original induction variable in
+; generated code.
+
+%struct.bit_stream_struc.3.43.51.71.83.91.99.107.154 = type { i8*, i32, %struct._IO_FILE.1.41.49.69.81.89.97.105.153*, i8*, i32, i64, i32, i32 }
+%struct._IO_FILE.1.41.49.69.81.89.97.105.153 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.0.40.48.68.80.88.96.104.152*, %struct._IO_FILE.1.41.49.69.81.89.97.105.153*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker.0.40.48.68.80.88.96.104.152 = type { %struct._IO_marker.0.40.48.68.80.88.96.104.152*, %struct._IO_FILE.1.41.49.69.81.89.97.105.153*, i32 }
+
+define i32 @copy_buffer(%struct.bit_stream_struc.3.43.51.71.83.91.99.107.154* nocapture %bs) {
+entry:
+ %buf_byte_idx5.phi.trans.insert = getelementptr inbounds %struct.bit_stream_struc.3.43.51.71.83.91.99.107.154, %struct.bit_stream_struc.3.43.51.71.83.91.99.107.154* %bs, i64 0, i32 6
+ br i1 undef, label %for.body, label %cleanup
+
+for.body:
+ %indvars.iv28 = phi i64 [ %indvars.iv.next29, %for.body ], [ 0, %entry ]
+ %indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1
+ %0 = load i32, i32* %buf_byte_idx5.phi.trans.insert, align 8
+ %cmp6 = icmp sgt i32 0, %0
+ br i1 %cmp6, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+ %1 = trunc i64 %indvars.iv.next29 to i32
+ br label %cleanup
+
+cleanup:
+ %retval.0 = phi i32 [ 0, %entry ], [ %1, %for.end.loopexit ]
+ ret i32 %retval.0
+}
+
+
+; CHECK: Invariant Accesses: {
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_body[i0] -> MemRef_bs[11] };
+; CHECK-NEXT: Execution Context: [p_0_loaded_from_bs] -> { : }
+; CHECK-NEXT: }
+; CHECK: Statements {
+; CHECK-NEXT: Stmt_for_body
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_body[0] : p_0_loaded_from_bs >= 0 };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_body[i0] -> [0, 0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_body[i0] -> MemRef_indvars_iv_next29[] };
+; CHECK-NEXT: Stmt_for_end_loopexit
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_end_loopexit[] : p_0_loaded_from_bs >= 0 };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_end_loopexit[] -> [1, 0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_end_loopexit[] -> MemRef_indvars_iv_next29[] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_end_loopexit[] -> MemRef_1[] };
+; CHECK-NEXT: }