/// separator is found.
void buildSequentialBlockStmts(BasicBlock *BB);
+ /// Create one or more ScopStmts for @p BB using equivalence classes.
+ ///
+ /// Instructions of a basic block that belong to the same equivalence class
+ /// are added to the same statement.
+ void buildEqivClassBlockStmts(BasicBlock *BB);
+
/// Create ScopStmt for all BBs and non-affine subregions of @p SR.
///
/// @param SR A subregion of @p R.
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
cl::init(false), cl::cat(PollyCategory));
-enum class GranularityChoice { BasicBlocks };
+enum class GranularityChoice { BasicBlocks, ScalarIndepependence };
static cl::opt<GranularityChoice> StmtGranularity(
"polly-stmt-granularity",
cl::desc(
"Algorithm to use for splitting basic blocks into multiple statements"),
cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
- "One statement per basic block")),
+ "One statement per basic block"),
+ clEnumValN(GranularityChoice::ScalarIndepependence,
+ "scalar-indep", "Scalar independence heuristic")),
cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory));
void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
scop->addScopStmt(BB, SurroundingLoop, Instructions, Count);
}
+/// Is @p Inst an ordered instruction?
+///
+/// An unordered instruction is an instruction, such that a sequence of
+/// unordered instructions can be permuted without changing semantics. Any
+/// instruction for which this is not always the case is ordered.
+static bool isOrderedInstruction(Instruction *Inst) {
+ return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
+}
+
+/// Join instructions to the same statement if one uses the scalar result of the
+/// other.
+static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
+ ArrayRef<Instruction *> ModeledInsts) {
+ for (Instruction *Inst : ModeledInsts) {
+ if (isa<PHINode>(Inst))
+ continue;
+
+ for (Use &Op : Inst->operands()) {
+ Instruction *OpInst = dyn_cast<Instruction>(Op.get());
+ if (!OpInst)
+ continue;
+
+ // Check if OpInst is in the BB and is a modeled instruction.
+ auto OpVal = UnionFind.findValue(OpInst);
+ if (OpVal == UnionFind.end())
+ continue;
+
+ UnionFind.unionSets(Inst, OpInst);
+ }
+ }
+}
+
+/// Join instructions that are used as incoming value in successor PHIs into the
+/// epilogue.
+static void
+joinIncomingPHIValuesIntoEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
+ ArrayRef<Instruction *> ModeledInsts,
+ BasicBlock *BB) {
+ for (BasicBlock *Succ : successors(BB)) {
+ for (Instruction &SuccInst : *Succ) {
+ PHINode *SuccPHI = dyn_cast<PHINode>(&SuccInst);
+ if (!SuccPHI)
+ break;
+
+ Value *IncomingVal = SuccPHI->getIncomingValueForBlock(BB);
+ Instruction *IncomingInst = dyn_cast<Instruction>(IncomingVal);
+ if (!IncomingInst)
+ continue;
+ if (IncomingInst->getParent() != BB)
+ continue;
+ if (UnionFind.findValue(IncomingInst) == UnionFind.end())
+ continue;
+
+ UnionFind.unionSets(nullptr, IncomingInst);
+ }
+ }
+}
+
+/// Ensure that the order of ordered instructions does not change.
+///
+/// If we encounter an ordered instruction enclosed in instructions belonging to
+/// a different statement (which might as well contain ordered instructions, but
+/// this is not tested here), join them.
+static void
+joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
+ ArrayRef<Instruction *> ModeledInsts) {
+ SetVector<Instruction *> SeenLeaders;
+ for (Instruction *Inst : ModeledInsts) {
+ if (!isOrderedInstruction(Inst))
+ continue;
+
+ Instruction *Leader = UnionFind.getLeaderValue(Inst);
+ bool Inserted = SeenLeaders.insert(Leader);
+ if (Inserted)
+ continue;
+
+ // Merge statements to close holes. Say, we have already seen statements A
+ // and B, in this order. Then we see an instruction of A again and we would
+ // see the pattern "A B A". This function joins all statements until the
+ // only seen occurrence of A.
+ for (Instruction *Prev : reverse(SeenLeaders)) {
+ // Items added to 'SeenLeaders' are leaders, but may have lost their
+ // leadership status when merged into another statement.
+ Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
+ if (PrevLeader == Leader)
+ break;
+ UnionFind.unionSets(Prev, Leader);
+ }
+ }
+}
+
+/// Also ensure that the epilogue is the last statement relative to all ordered
+/// instructions.
+///
+/// This is basically joinOrderedInstructions() but using the epilogue as
+/// 'ordered instruction'.
+static void joinAllAfterEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
+ ArrayRef<Instruction *> ModeledInsts) {
+ bool EpilogueSeen = false;
+ for (Instruction *Inst : ModeledInsts) {
+ auto PHIWritesLeader = UnionFind.findLeader(nullptr);
+ auto InstLeader = UnionFind.findLeader(Inst);
+
+ if (PHIWritesLeader == InstLeader)
+ EpilogueSeen = true;
+
+ if (!isOrderedInstruction(Inst))
+ continue;
+
+ if (EpilogueSeen)
+ UnionFind.unionSets(PHIWritesLeader, InstLeader);
+ }
+}
+
+void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
+ Loop *L = LI.getLoopFor(BB);
+
+ // Extracting out modeled instructions saves us from checking
+ // shouldModelInst() repeatedly.
+ SmallVector<Instruction *, 32> ModeledInsts;
+ EquivalenceClasses<Instruction *> UnionFind;
+ for (Instruction &Inst : *BB) {
+ if (!shouldModelInst(&Inst, L))
+ continue;
+ ModeledInsts.push_back(&Inst);
+ UnionFind.insert(&Inst);
+ }
+
+ // 'nullptr' represents the last statement for a basic block. It contains no
+ // instructions, but holds the PHI write accesses for successor basic blocks.
+ // If a PHI has an incoming value defined in this BB, it can also be merged
+ // with other statements.
+ // TODO: We wouldn't need this if we would add PHIWrites into the statement
+ // that defines the incoming value (if in the BB) instead of always the last,
+ // so we could unconditionally always add a last statement.
+ UnionFind.insert(nullptr);
+
+ joinOperandTree(UnionFind, ModeledInsts);
+ joinIncomingPHIValuesIntoEpilogue(UnionFind, ModeledInsts, BB);
+ joinOrderedInstructions(UnionFind, ModeledInsts);
+ joinAllAfterEpilogue(UnionFind, ModeledInsts);
+
+ // The list of instructions for statement (statement represented by the leader
+ // instruction). The order of statements instructions is reversed such that
+ // the epilogue is first. This makes it easier to ensure that the epilogue is
+ // the last statement.
+ MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
+
+ // Ensure that the epilogue is last.
+ LeaderToInstList[nullptr];
+
+ // Collect the instructions of all leaders. UnionFind's member iterator
+ // unfortunately are not in any specific order.
+ for (Instruction &Inst : reverse(*BB)) {
+ auto LeaderIt = UnionFind.findLeader(&Inst);
+ if (LeaderIt == UnionFind.member_end())
+ continue;
+
+ std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
+ InstList.push_back(&Inst);
+ }
+
+ // Finally build the statements.
+ int Count = 0;
+ for (auto &Instructions : reverse(LeaderToInstList)) {
+ std::vector<Instruction *> &InstList = Instructions.second;
+ std::reverse(InstList.begin(), InstList.end());
+ scop->addScopStmt(BB, L, std::move(InstList), Count);
+ Count += 1;
+ }
+}
+
void ScopBuilder::buildStmts(Region &SR) {
if (scop->isNonAffineSubRegion(&SR)) {
std::vector<Instruction *> Instructions;
case GranularityChoice::BasicBlocks:
buildSequentialBlockStmts(BB);
break;
+ case GranularityChoice::ScalarIndepependence:
+ buildEqivClassBlockStmts(BB);
+ break;
}
}
}
--- /dev/null
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; Split a block into two independent statements that share no scalar.
+; This case has the instructions of the two statements interleaved, such that
+; splitting the BasicBlock in the middle would cause a scalar dependency.
+;
+; for (int j = 0; j < n; j += 1) {
+; body:
+; double valA = A[0];
+; double valB = 21.0 + 21.0;
+; A[0] = valA;
+; B[0] = valB;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ %valA = load double, double* %A
+ %valB = fadd double 21.0, 21.0
+ store double %valA, double* %A
+ store double %valB, double* %B
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statements {
+; CHECK-NEXT: Stmt_body
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [n] -> { Stmt_body[i0] : 0 <= i0 < n };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> [i0, 0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %valA = load double, double* %A
+; CHECK-NEXT: store double %valA, double* %A
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_body1
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [n] -> { Stmt_body1[i0] : 0 <= i0 < n };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> [i0, 1] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> MemRef_B[0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %valB = fadd double 2.100000e+01, 2.100000e+01
+; CHECK-NEXT: store double %valB, double* %B
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; Split a block into two independent statements that share no scalar.
+; This case has an independent statement just for PHI writes.
+;
+; for (int j = 0; j < n; j += 1) {
+; bodyA:
+; double valA = A[0];
+; A[0] = valA;
+;
+; bodyB:
+; phi = 42.0;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %valA = load double, double* %A
+ store double %valA, double* %A
+ br label %bodyB
+
+ bodyB:
+ %phi = phi double [42.0, %bodyA]
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> [i0, 0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %valA = load double, double* %A
+; CHECK-NEXT: store double %valA, double* %A
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyA1
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [n] -> { Stmt_bodyA1[i0] : 0 <= i0 < n };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [n] -> { Stmt_bodyA1[i0] -> [i0, 1] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA1[i0] -> MemRef_phi__phi[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; Check that the statement where the PHI writes are in always comes last.
+; Otherwise we'd need to introduce a scalar dependency from the statement
+; defining a value to the last statement of a basic block.
+;
+; for (int j = 0; j < n; j += 1) {
+; bodyA:
+; double valA = A[0];
+; A[0] = valA;
+; double valB = B[0];
+; B[0] = valB;
+;
+; bodyB:
+; phi = valA;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %valA = load double, double* %A
+ store double %valA, double* %A
+ %valB = load double, double* %B
+ store double %valB, double* %B
+ br label %bodyB
+
+ bodyB:
+ %phi = phi double [%valA, %bodyA]
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> [i0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_phi__phi[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %valA = load double, double* %A
+; CHECK-NEXT: store double %valA, double* %A
+; CHECK-NEXT: %valB = load double, double* %B
+; CHECK-NEXT: store double %valB, double* %B
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; This case has no explicit epilogue for PHI writes because it would
+; have a scalar dependency to the previous statement.
+;
+; for (int j = 0; j < n; j += 1) {
+; bodyA:
+; double valA = A[0];
+; A[0] = valA;
+;
+; bodyB:
+; phi = valA;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %valA = load double, double* %A
+ store double %valA, double* %A
+ br label %bodyB
+
+ bodyB:
+ %phi = phi double [%valA, %bodyA]
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> [i0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_phi__phi[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %valA = load double, double* %A
+; CHECK-NEXT: store double %valA, double* %A
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; This case cannot be split into two statements because the order of
+; loads and store would be violated.
+;
+; for (int j = 0; j < n; j += 1) {
+; body:
+; double valA = A[0];
+; double valB = B[0];
+; A[0] = valA;
+; A[0] = valB;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ %valA = load double, double* %A
+ %valB = load double, double* %B
+ store double %valA, double* %A
+ store double %valB, double* %A
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statements {
+; CHECK-NEXT: Stmt_body
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [n] -> { Stmt_body[i0] : 0 <= i0 < n };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> [i0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_B[0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %valA = load double, double* %A
+; CHECK-NEXT: %valB = load double, double* %B
+; CHECK-NEXT: store double %valA, double* %A
+; CHECK-NEXT: store double %valB, double* %A
+; CHECK-NEXT: }
+; CHECK-NEXT: }