if (R != 0)
return true; // gcd doesn't divide Delta, no dependence
Q = Delta.sdiv(G);
- X *= Q;
- Y *= Q;
return false;
}
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
// where i is an induction variable, c1 and c2 are loop invariant, and a1
// and a2 are constant, we can solve it exactly using an algorithm developed
-// by Banerjee and Wolfe. See Section 2.5.3 in
+// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
//
-// Optimizing Supercompilers for Supercomputers
-// Michael Wolfe
-// MIT Press, 1989
+// Dependence Analysis for Supercomputing
+// Utpal Banerjee
+// Kluwer Academic Publishers, 1988
//
// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
// so use them if possible. They're also a bit better with symbolics and,
// in the case of the strong SIV test, can compute Distances.
//
// Return true if dependence disproved.
+//
+// This is a modified version of the original Banerjee algorithm. The original
+// only tested whether Dst depends on Src. This algorithm extends that and
+// returns all the dependencies that exist between Dst and Src.
bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const SCEV *SrcConst, const SCEV *DstConst,
const Loop *CurLoop, unsigned Level,
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
- NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
- Delta, CurLoop);
+ NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta,
+ CurLoop);
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
APInt G, X, Y;
APInt AM = ConstSrcCoeff->getAPInt();
APInt BM = ConstDstCoeff->getAPInt();
+ APInt CM = ConstDelta->getAPInt();
unsigned Bits = AM.getBitWidth();
- if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
+ if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
// gcd doesn't divide Delta, no dependence
++ExactSIVindependence;
++ExactSIVsuccesses;
// since SCEV construction normalizes, LM = 0
APInt UM(Bits, 1, true);
- bool UMvalid = false;
+ bool UMValid = false;
// UM is perhaps unavailable, let's check
if (const SCEVConstant *CUB =
- collectConstantUpperBound(CurLoop, Delta->getType())) {
+ collectConstantUpperBound(CurLoop, Delta->getType())) {
UM = CUB->getAPInt();
LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n");
- UMvalid = true;
+ UMValid = true;
}
APInt TU(APInt::getSignedMaxValue(Bits));
APInt TL(APInt::getSignedMinValue(Bits));
+ APInt TC = CM.sdiv(G);
+ APInt TX = X * TC;
+ APInt TY = Y * TC;
+ LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
+ LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
+ LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
+
+ SmallVector<APInt, 2> TLVec, TUVec;
+ APInt TB = BM.sdiv(G);
+ if (TB.sgt(0)) {
+ TLVec.push_back(ceilingOfQuotient(-TX, TB));
+ LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
+ // New bound check - modification to Banerjee's e3 check
+ if (UMValid) {
+ TUVec.push_back(floorOfQuotient(UM - TX, TB));
+ LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
+ }
+ } else {
+ TUVec.push_back(floorOfQuotient(-TX, TB));
+ LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
+ // New bound check - modification to Banerjee's e3 check
+ if (UMValid) {
+ TLVec.push_back(ceilingOfQuotient(UM - TX, TB));
+ LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
+ }
+ }
+
+ APInt TA = AM.sdiv(G);
+ if (TA.sgt(0)) {
+ if (UMValid) {
+ TUVec.push_back(floorOfQuotient(UM - TY, TA));
+ LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
+ }
+ // New bound check - modification to Banerjee's e3 check
+ TLVec.push_back(ceilingOfQuotient(-TY, TA));
+ LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
+ } else {
+ if (UMValid) {
+ TLVec.push_back(ceilingOfQuotient(UM - TY, TA));
+ LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
+ }
+ // New bound check - modification to Banerjee's e3 check
+ TUVec.push_back(floorOfQuotient(-TY, TA));
+ LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
+ }
+
+ LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
+ LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
+
+ if (TLVec.empty() || TUVec.empty())
+ return false;
+ TL = APIntOps::smax(TLVec.front(), TLVec.back());
+ TU = APIntOps::smin(TUVec.front(), TUVec.back());
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
- // test(BM/G, LM-X) and test(-BM/G, X-UM)
- APInt TMUL = BM.sdiv(G);
- if (TMUL.sgt(0)) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL));
- LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
- if (UMvalid) {
- TU = APIntOps::smin(TU, floorOfQuotient(UM - X, TMUL));
- LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
- }
- }
- else {
- TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL));
- LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
- if (UMvalid) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(UM - X, TMUL));
- LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
- }
- }
-
- // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
- TMUL = AM.sdiv(G);
- if (TMUL.sgt(0)) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
- if (UMvalid) {
- TU = APIntOps::smin(TU, floorOfQuotient(UM - Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
- }
- }
- else {
- TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
- if (UMvalid) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(UM - Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
- }
- }
if (TL.sgt(TU)) {
++ExactSIVindependence;
++ExactSIVsuccesses;
// explore directions
unsigned NewDirection = Dependence::DVEntry::NONE;
-
- // less than
- APInt SaveTU(TU); // save these
- APInt SaveTL(TL);
- LLVM_DEBUG(dbgs() << "\t exploring LT direction\n");
- TMUL = AM - BM;
- if (TMUL.sgt(0)) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y + 1, TMUL));
- LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
- }
- else {
- TU = APIntOps::smin(TU, floorOfQuotient(X - Y + 1, TMUL));
- LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
- }
- if (TL.sle(TU)) {
- NewDirection |= Dependence::DVEntry::LT;
- ++ExactSIVsuccesses;
+ APInt LowerDistance, UpperDistance;
+ if (TA.sgt(TB)) {
+ LowerDistance = (TY - TX) + (TA - TB) * TL;
+ UpperDistance = (TY - TX) + (TA - TB) * TU;
+ } else {
+ LowerDistance = (TY - TX) + (TA - TB) * TU;
+ UpperDistance = (TY - TX) + (TA - TB) * TL;
}
- // equal
- TU = SaveTU; // restore
- TL = SaveTL;
- LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n");
- if (TMUL.sgt(0)) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
- }
- else {
- TU = APIntOps::smin(TU, floorOfQuotient(X - Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
- }
- TMUL = BM - AM;
- if (TMUL.sgt(0)) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X, TMUL));
- LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
- }
- else {
- TU = APIntOps::smin(TU, floorOfQuotient(Y - X, TMUL));
- LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
- }
- if (TL.sle(TU)) {
+ LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n");
+ LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n");
+
+ APInt Zero(Bits, 0, true);
+ if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) {
NewDirection |= Dependence::DVEntry::EQ;
++ExactSIVsuccesses;
}
-
- // greater than
- TU = SaveTU; // restore
- TL = SaveTL;
- LLVM_DEBUG(dbgs() << "\t exploring GT direction\n");
- if (TMUL.sgt(0)) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X + 1, TMUL));
- LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
- }
- else {
- TU = APIntOps::smin(TU, floorOfQuotient(Y - X + 1, TMUL));
- LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
- }
- if (TL.sle(TU)) {
+ if (LowerDistance.slt(0)) {
NewDirection |= Dependence::DVEntry::GT;
++ExactSIVsuccesses;
}
+ if (UpperDistance.sgt(0)) {
+ NewDirection |= Dependence::DVEntry::LT;
+ ++ExactSIVsuccesses;
+ }
// finished
Result.DV[Level].Direction &= NewDirection;
if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
++ExactSIVindependence;
+ LLVM_DEBUG(dbgs() << "\t Result = ");
+ LLVM_DEBUG(Result.dump(dbgs()));
return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
}
-
// Return true if the divisor evenly divides the dividend.
static
bool isRemainderZero(const SCEVConstant *Dividend,
APInt G, X, Y;
APInt AM = ConstSrcCoeff->getAPInt();
APInt BM = ConstDstCoeff->getAPInt();
+ APInt CM = ConstDelta->getAPInt();
unsigned Bits = AM.getBitWidth();
- if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
+ if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
// gcd doesn't divide Delta, no dependence
++ExactRDIVindependence;
return true;
bool SrcUMvalid = false;
// SrcUM is perhaps unavailable, let's check
if (const SCEVConstant *UpperBound =
- collectConstantUpperBound(SrcLoop, Delta->getType())) {
+ collectConstantUpperBound(SrcLoop, Delta->getType())) {
SrcUM = UpperBound->getAPInt();
LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
SrcUMvalid = true;
bool DstUMvalid = false;
// UM is perhaps unavailable, let's check
if (const SCEVConstant *UpperBound =
- collectConstantUpperBound(DstLoop, Delta->getType())) {
+ collectConstantUpperBound(DstLoop, Delta->getType())) {
DstUM = UpperBound->getAPInt();
LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
DstUMvalid = true;
APInt TU(APInt::getSignedMaxValue(Bits));
APInt TL(APInt::getSignedMinValue(Bits));
-
- // test(BM/G, LM-X) and test(-BM/G, X-UM)
- APInt TMUL = BM.sdiv(G);
- if (TMUL.sgt(0)) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL));
- LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ APInt TC = CM.sdiv(G);
+ APInt TX = X * TC;
+ APInt TY = Y * TC;
+ LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
+ LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
+ LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
+
+ SmallVector<APInt, 2> TLVec, TUVec;
+ APInt TB = BM.sdiv(G);
+ if (TB.sgt(0)) {
+ TLVec.push_back(ceilingOfQuotient(-TX, TB));
+ LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
if (SrcUMvalid) {
- TU = APIntOps::smin(TU, floorOfQuotient(SrcUM - X, TMUL));
- LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ TUVec.push_back(floorOfQuotient(SrcUM - TX, TB));
+ LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
}
- }
- else {
- TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL));
- LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ } else {
+ TUVec.push_back(floorOfQuotient(-TX, TB));
+ LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
if (SrcUMvalid) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(SrcUM - X, TMUL));
- LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB));
+ LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
}
}
- // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
- TMUL = AM.sdiv(G);
- if (TMUL.sgt(0)) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ APInt TA = AM.sdiv(G);
+ if (TA.sgt(0)) {
+ TLVec.push_back(ceilingOfQuotient(-TY, TA));
+ LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
if (DstUMvalid) {
- TU = APIntOps::smin(TU, floorOfQuotient(DstUM - Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ TUVec.push_back(floorOfQuotient(DstUM - TY, TA));
+ LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
}
- }
- else {
- TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ } else {
+ TUVec.push_back(floorOfQuotient(-TY, TA));
+ LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
if (DstUMvalid) {
- TL = APIntOps::smax(TL, ceilingOfQuotient(DstUM - Y, TMUL));
- LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA));
+ LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
}
}
+
+ if (TLVec.empty() || TUVec.empty())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
+ LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
+
+ TL = APIntOps::smax(TLVec.front(), TLVec.back());
+ TU = APIntOps::smin(TUVec.front(), TUVec.back());
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
+
if (TL.sgt(TU))
++ExactRDIVindependence;
return TL.sgt(TU);
; CHECK-LABEL: exact0
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [>]!
+; CHECK: da analyze - flow [<=|<]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact3
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [>]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact4
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [>]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact5
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [=>|<]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact6
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [=>|<]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact7
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [*|<]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact9
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [>]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact10
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [>]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact11
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [=>|<]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact12
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [=>|<]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK-LABEL: exact13
; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [*|<]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!