From 59b61b9e2c549956b1094417a72c3943c20c9234 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Thu, 11 Oct 2012 07:32:34 +0000 Subject: [PATCH] dependence analysis Patch from Preston Briggs . This is an updated version of the dependence-analysis patch, including an MIV test based on Banerjee's inequalities. It's a fairly complete implementation of the paper Practical Dependence Testing Gina Goff, Ken Kennedy, and Chau-Wen Tseng PLDI 1991 It cannot yet propagate constraints between coupled RDIV subscripts (discussed in Section 5.3.2 of the paper). It's organized as a FunctionPass with a single entry point that supports testing for dependence between two instructions in a function. If there's no dependence, it returns null. If there's a dependence, it returns a pointer to a Dependence which can be queried about details (what kind of dependence, is it loop independent, direction and distance vector entries, etc). I haven't included every imaginable feature, but there's a good selection that should be adequate for supporting many loop transformations. Of course, it can be extended as necessary. Included in the patch file are many test cases, commented with C code showing the loops and array references. llvm-svn: 165708 --- llvm/include/llvm/Analysis/DependenceAnalysis.h | 891 +++++ llvm/include/llvm/Analysis/Passes.h | 9 + llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/lib/Analysis/Analysis.cpp | 1 + llvm/lib/Analysis/CMakeLists.txt | 1 + llvm/lib/Analysis/DependenceAnalysis.cpp | 3781 ++++++++++++++++++++ llvm/test/Analysis/DependenceAnalysis/Banerjee.ll | 595 +++ llvm/test/Analysis/DependenceAnalysis/Coupled.ll | 509 +++ llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll | 508 +++ llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll | 428 +++ llvm/test/Analysis/DependenceAnalysis/GCD.ll | 597 ++++ .../Analysis/DependenceAnalysis/Preliminary.ll | 469 +++ .../Analysis/DependenceAnalysis/Propagating.ll | 467 +++ .../Analysis/DependenceAnalysis/Separability.ll | 267 ++ llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll | 342 ++ .../Analysis/DependenceAnalysis/SymbolicRDIV.ll | 312 ++ .../Analysis/DependenceAnalysis/SymbolicSIV.ll | 330 ++ .../Analysis/DependenceAnalysis/WeakCrossingSIV.ll | 220 ++ .../Analysis/DependenceAnalysis/WeakZeroDstSIV.ll | 212 ++ .../Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll | 212 ++ llvm/test/Analysis/DependenceAnalysis/ZIV.ll | 53 + .../test/Analysis/DependenceAnalysis/lit.local.cfg | 1 + 23 files changed, 10207 insertions(+) create mode 100644 llvm/include/llvm/Analysis/DependenceAnalysis.h create mode 100644 llvm/lib/Analysis/DependenceAnalysis.cpp create mode 100644 llvm/test/Analysis/DependenceAnalysis/Banerjee.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/Coupled.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/GCD.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/Preliminary.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/Propagating.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/Separability.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/ZIV.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/lit.local.cfg diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h new file mode 100644 index 0000000..0abf6d8 --- /dev/null +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -0,0 +1,891 @@ +//===-- llvm/Analysis/DependenceAnalysis.h -------------------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// DependenceAnalysis is an LLVM pass that analyses dependences between memory +// accesses. Currently, it is an implementation of the approach described in +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// There's a single entry point that analyzes the dependence between a pair +// of memory references in a function, returning either NULL, for no dependence, +// or a more-or-less detailed description of the dependence between them. +// +// Please note that this is work in progress and the interface is subject to +// change. +// +// Plausible changes: +// Return a set of more precise dependences instead of just one dependence +// summarizing all. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DEPENDENCEANALYSIS_H +#define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H + +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/Instruction.h" +#include "llvm/Pass.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/raw_ostream.h" + + +namespace llvm { + class AliasAnalysis; + class ScalarEvolution; + class SCEV; + class Value; + class raw_ostream; + + /// Dependence - This class represents a dependence between two memory + /// memory references in a function. It contains minimal information and + /// is used in the very common situation where the compiler is unable to + /// determine anything beyond the existence of a dependence; that is, it + /// represents a confused dependence (see also FullDependence). In most + /// cases (for output, flow, and anti dependences), the dependence implies + /// an ordering, where the source must preceed the destination; in contrast, + /// input dependences are unordered. + class Dependence { + public: + Dependence(const Instruction *Source, + const Instruction *Destination) : + Src(Source), Dst(Destination) {} + virtual ~Dependence() {} + + /// Dependence::DVEntry - Each level in the distance/direction vector + /// has a direction (or perhaps a union of several directions), and + /// perhaps a distance. + struct DVEntry { + enum { NONE = 0, + LT = 1, + EQ = 2, + LE = 3, + GT = 4, + NE = 5, + GE = 6, + ALL = 7 }; + unsigned char Direction : 3; // Init to ALL, then refine. + bool Scalar : 1; // Init to true. + bool PeelFirst : 1; // Peeling the first iteration will break dependence. + bool PeelLast : 1; // Peeling the last iteration will break the dependence. + bool Splitable : 1; // Splitting the loop will break dependence. + const SCEV *Distance; // NULL implies no distance available. + DVEntry() : Direction(ALL), Scalar(true), PeelFirst(false), + PeelLast(false), Splitable(false), Distance(NULL) { } + }; + + /// getSrc - Returns the source instruction for this dependence. + /// + const Instruction *getSrc() const { return Src; } + + /// getDst - Returns the destination instruction for this dependence. + /// + const Instruction *getDst() const { return Dst; } + + /// isInput - Returns true if this is an input dependence. + /// + bool isInput() const; + + /// isOutput - Returns true if this is an output dependence. + /// + bool isOutput() const; + + /// isFlow - Returns true if this is a flow (aka true) dependence. + /// + bool isFlow() const; + + /// isAnti - Returns true if this is an anti dependence. + /// + bool isAnti() const; + + /// isOrdered - Returns true if dependence is Output, Flow, or Anti + /// + bool isOrdered() const { return isOutput() || isFlow() || isAnti(); } + + /// isUnordered - Returns true if dependence is Input + /// + bool isUnordered() const { return isInput(); } + + /// isLoopIndependent - Returns true if this is a loop-independent + /// dependence. + virtual bool isLoopIndependent() const { return true; } + + /// isConfused - Returns true if this dependence is confused + /// (the compiler understands nothing and makes worst-case + /// assumptions). + virtual bool isConfused() const { return true; } + + /// isConsistent - Returns true if this dependence is consistent + /// (occurs every time the source and destination are executed). + virtual bool isConsistent() const { return false; } + + /// getLevels - Returns the number of common loops surrounding the + /// souce and destination of the dependence. + virtual unsigned getLevels() const { return 0; } + + /// getDirection - Returns the direction associated with a particular + /// level. + virtual unsigned getDirection(unsigned Level) const { return DVEntry::ALL; } + + /// getDistance - Returns the distance (or NULL) associated with a + /// particular level. + virtual const SCEV *getDistance(unsigned Level) const { return NULL; } + + /// isPeelFirst - Returns true if peeling the first iteration from + /// this loop will break this dependence. + virtual bool isPeelFirst(unsigned Level) const { return false; } + + /// isPeelLast - Returns true if peeling the last iteration from + /// this loop will break this dependence. + virtual bool isPeelLast(unsigned Level) const { return false; } + + /// isSplitable - Returns true if splitting this loop will break + /// the dependence. + virtual bool isSplitable(unsigned Level) const { return false; } + + /// isScalar - Returns true if a particular level is scalar; that is, + /// if no subscript in the source or destination mention the induction + /// variable associated with the loop at this level. + virtual bool isScalar(unsigned Level) const; + + /// dump - For debugging purposes, dumps a dependence to OS. + /// + void dump(raw_ostream &OS) const; + private: + const Instruction *Src, *Dst; + friend class DependenceAnalysis; + }; + + + /// FullDependence - This class represents a dependence between two memory + /// references in a function. It contains detailed information about the + /// dependence (direction vectors, etc) and is used when the compiler is + /// able to accurately analyze the interaction of the references; that is, + /// it is not a confused dependence (see Dependence). In most cases + /// (for output, flow, and anti dependences), the dependence implies an + /// ordering, where the source must preceed the destination; in contrast, + /// input dependences are unordered. + class FullDependence : public Dependence { + public: + FullDependence(const Instruction *Src, + const Instruction *Dst, + bool LoopIndependent, + unsigned Levels); + ~FullDependence() { + delete DV; + } + + /// isLoopIndependent - Returns true if this is a loop-independent + /// dependence. + bool isLoopIndependent() const { return LoopIndependent; } + + /// isConfused - Returns true if this dependence is confused + /// (the compiler understands nothing and makes worst-case + /// assumptions). + bool isConfused() const { return false; } + + /// isConsistent - Returns true if this dependence is consistent + /// (occurs every time the source and destination are executed). + bool isConsistent() const { return Consistent; } + + /// getLevels - Returns the number of common loops surrounding the + /// souce and destination of the dependence. + unsigned getLevels() const { return Levels; } + + /// getDirection - Returns the direction associated with a particular + /// level. + unsigned getDirection(unsigned Level) const; + + /// getDistance - Returns the distance (or NULL) associated with a + /// particular level. + const SCEV *getDistance(unsigned Level) const; + + /// isPeelFirst - Returns true if peeling the first iteration from + /// this loop will break this dependence. + bool isPeelFirst(unsigned Level) const; + + /// isPeelLast - Returns true if peeling the last iteration from + /// this loop will break this dependence. + bool isPeelLast(unsigned Level) const; + + /// isSplitable - Returns true if splitting the loop will break + /// the dependence. + bool isSplitable(unsigned Level) const; + + /// isScalar - Returns true if a particular level is scalar; that is, + /// if no subscript in the source or destination mention the induction + /// variable associated with the loop at this level. + bool isScalar(unsigned Level) const; + private: + unsigned short Levels; + bool LoopIndependent; + bool Consistent; // Init to true, then refine. + DVEntry *DV; + friend class DependenceAnalysis; + }; + + + /// DependenceAnalysis - This class is the main dependence-analysis driver. + /// + class DependenceAnalysis : public FunctionPass { + void operator=(const DependenceAnalysis &); // do not implement + DependenceAnalysis(const DependenceAnalysis &); // do not implement + public: + /// depends - Tests for a dependence between the Src and Dst instructions. + /// Returns NULL if no dependence; otherwise, returns a Dependence (or a + /// FullDependence) with as much information as can be gleaned. + /// The flag PossiblyLoopIndependent should be set by the caller + /// if it appears that control flow can reach from Src to Dst + /// without traversing a loop back edge. + Dependence *depends(const Instruction *Src, + const Instruction *Dst, + bool PossiblyLoopIndependent); + + /// getSplitIteration - Give a dependence that's splitable at some + /// particular level, return the iteration that should be used to split + /// the loop. + /// + /// Generally, the dependence analyzer will be used to build + /// a dependence graph for a function (basically a map from instructions + /// to dependences). Looking for cycles in the graph shows us loops + /// that cannot be trivially vectorized/parallelized. + /// + /// We can try to improve the situation by examining all the dependences + /// that make up the cycle, looking for ones we can break. + /// Sometimes, peeling the first or last iteration of a loop will break + /// dependences, and there are flags for those possibilities. + /// Sometimes, splitting a loop at some other iteration will do the trick, + /// and we've got a flag for that case. Rather than waste the space to + /// record the exact iteration (since we rarely know), we provide + /// a method that calculates the iteration. It's a drag that it must work + /// from scratch, but wonderful in that it's possible. + /// + /// Here's an example: + /// + /// for (i = 0; i < 10; i++) + /// A[i] = ... + /// ... = A[11 - i] + /// + /// There's a loop-carried flow dependence from the store to the load, + /// found by the weak-crossing SIV test. The dependence will have a flag, + /// indicating that the dependence can be broken by splitting the loop. + /// Calling getSplitIteration will return 5. + /// Splitting the loop breaks the dependence, like so: + /// + /// for (i = 0; i <= 5; i++) + /// A[i] = ... + /// ... = A[11 - i] + /// for (i = 6; i < 10; i++) + /// A[i] = ... + /// ... = A[11 - i] + /// + /// breaks the dependence and allows us to vectorize/parallelize + /// both loops. + const SCEV *getSplitIteration(const Dependence *Dep, unsigned Level); + + private: + AliasAnalysis *AA; + ScalarEvolution *SE; + LoopInfo *LI; + Function *F; + + /// Subscript - This private struct represents a pair of subscripts from + /// a pair of potentially multi-dimensional array references. We use a + /// vector of them to guide subscript partitioning. + struct Subscript { + const SCEV *Src; + const SCEV *Dst; + enum ClassificationKind { ZIV, SIV, RDIV, MIV, NonLinear } Classification; + SmallBitVector Loops; + SmallBitVector GroupLoops; + SmallBitVector Group; + }; + + struct CoefficientInfo { + const SCEV *Coeff; + const SCEV *PosPart; + const SCEV *NegPart; + const SCEV *Iterations; + }; + + struct BoundInfo { + const SCEV *Iterations; + const SCEV *Upper[8]; + const SCEV *Lower[8]; + unsigned char Direction; + unsigned char DirSet; + }; + + /// Constraint - This private class represents a constraint, as defined + /// in the paper + /// + /// Practical Dependence Testing + /// Goff, Kennedy, Tseng + /// PLDI 1991 + /// + /// There are 5 kinds of constraint, in a hierarchy. + /// 1) Any - indicates no constraint, any dependence is possible. + /// 2) Line - A line ax + by = c, where a, b, and c are parameters, + /// representing the dependence equation. + /// 3) Distance - The value d of the dependence distance; + /// 4) Point - A point representing the dependence from + /// iteration x to iteration y. + /// 5) Empty - No dependence is possible. + class Constraint { + private: + enum ConstraintKind { Empty, Point, Distance, Line, Any } Kind; + ScalarEvolution *SE; + const SCEV *A; + const SCEV *B; + const SCEV *C; + const Loop *AssociatedLoop; + public: + /// isEmpty - Return true if the constraint is of kind Empty. + bool isEmpty() const { return Kind == Empty; } + + /// isPoint - Return true if the constraint is of kind Point. + bool isPoint() const { return Kind == Point; } + + /// isDistance - Return true if the constraint is of kind Distance. + bool isDistance() const { return Kind == Distance; } + + /// isLine - Return true if the constraint is of kind Line. + /// Since Distance's can also be represented as Lines, we also return + /// true if the constraint is of kind Distance. + bool isLine() const { return Kind == Line || Kind == Distance; } + + /// isAny - Return true if the constraint is of kind Any; + bool isAny() const { return Kind == Any; } + + /// getX - If constraint is a point , returns X. + /// Otherwise assert. + const SCEV *getX() const; + + /// getY - If constraint is a point , returns Y. + /// Otherwise assert. + const SCEV *getY() const; + + /// getA - If constraint is a line AX + BY = C, returns A. + /// Otherwise assert. + const SCEV *getA() const; + + /// getB - If constraint is a line AX + BY = C, returns B. + /// Otherwise assert. + const SCEV *getB() const; + + /// getC - If constraint is a line AX + BY = C, returns C. + /// Otherwise assert. + const SCEV *getC() const; + + /// getD - If constraint is a distance, returns D. + /// Otherwise assert. + const SCEV *getD() const; + + /// getAssociatedLoop - Returns the loop associated with this constraint. + const Loop *getAssociatedLoop() const; + + /// setPoint - Change a constraint to Point. + void setPoint(const SCEV *X, const SCEV *Y, const Loop *CurrentLoop); + + /// setLine - Change a constraint to Line. + void setLine(const SCEV *A, const SCEV *B, + const SCEV *C, const Loop *CurrentLoop); + + /// setDistance - Change a constraint to Distance. + void setDistance(const SCEV *D, const Loop *CurrentLoop); + + /// setEmpty - Change a constraint to Empty. + void setEmpty(); + + /// setAny - Change a constraint to Any. + void setAny(ScalarEvolution *SE); + + /// dump - For debugging purposes. Dumps the constraint + /// out to OS. + void dump(raw_ostream &OS) const; + }; + + + /// establishNestingLevels - Examines the loop nesting of the Src and Dst + /// instructions and establishes their shared loops. Sets the variables + /// CommonLevels, SrcLevels, and MaxLevels. + /// The source and destination instructions needn't be contained in the same + /// loop. The routine establishNestingLevels finds the level of most deeply + /// nested loop that contains them both, CommonLevels. An instruction that's + /// not contained in a loop is at level = 0. MaxLevels is equal to the level + /// of the source plus the level of the destination, minus CommonLevels. + /// This lets us allocate vectors MaxLevels in length, with room for every + /// distinct loop referenced in both the source and destination subscripts. + /// The variable SrcLevels is the nesting depth of the source instruction. + /// It's used to help calculate distinct loops referenced by the destination. + /// Here's the map from loops to levels: + /// 0 - unused + /// 1 - outermost common loop + /// ... - other common loops + /// CommonLevels - innermost common loop + /// ... - loops containing Src but not Dst + /// SrcLevels - innermost loop containing Src but not Dst + /// ... - loops containing Dst but not Src + /// MaxLevels - innermost loop containing Dst but not Src + /// Consider the follow code fragment: + /// for (a = ...) { + /// for (b = ...) { + /// for (c = ...) { + /// for (d = ...) { + /// A[] = ...; + /// } + /// } + /// for (e = ...) { + /// for (f = ...) { + /// for (g = ...) { + /// ... = A[]; + /// } + /// } + /// } + /// } + /// } + /// If we're looking at the possibility of a dependence between the store + /// to A (the Src) and the load from A (the Dst), we'll note that they + /// have 2 loops in common, so CommonLevels will equal 2 and the direction + /// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. + /// A map from loop names to level indices would look like + /// a - 1 + /// b - 2 = CommonLevels + /// c - 3 + /// d - 4 = SrcLevels + /// e - 5 + /// f - 6 + /// g - 7 = MaxLevels + void establishNestingLevels(const Instruction *Src, + const Instruction *Dst); + + unsigned CommonLevels, SrcLevels, MaxLevels; + + /// mapSrcLoop - Given one of the loops containing the source, return + /// its level index in our numbering scheme. + unsigned mapSrcLoop(const Loop *SrcLoop) const; + + /// mapDstLoop - Given one of the loops containing the destination, + /// return its level index in our numbering scheme. + unsigned mapDstLoop(const Loop *DstLoop) const; + + /// isLoopInvariant - Returns true if Expression is loop invariant + /// in LoopNest. + bool isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const; + + /// removeMatchingExtensions - Examines a subscript pair. + /// If the source and destination are identically sign (or zero) + /// extended, it strips off the extension in an effort to + /// simplify the actual analysis. + void removeMatchingExtensions(Subscript *Pair); + + /// collectCommonLoops - Finds the set of loops from the LoopNest that + /// have a level <= CommonLevels and are referred to by the SCEV Expression. + void collectCommonLoops(const SCEV *Expression, + const Loop *LoopNest, + SmallBitVector &Loops) const; + + /// checkSrcSubscript - Examines the SCEV Src, returning true iff it's + /// linear. Collect the set of loops mentioned by Src. + bool checkSrcSubscript(const SCEV *Src, + const Loop *LoopNest, + SmallBitVector &Loops); + + /// checkDstSubscript - Examines the SCEV Dst, returning true iff it's + /// linear. Collect the set of loops mentioned by Dst. + bool checkDstSubscript(const SCEV *Dst, + const Loop *LoopNest, + SmallBitVector &Loops); + + /// isKnownPredicate - Compare X and Y using the predicate Pred. + /// Basically a wrapper for SCEV::isKnownPredicate, + /// but tries harder, especially in the presense of sign and zero + /// extensions and symbolics. + bool isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *X, + const SCEV *Y) const; + + /// collectUpperBound - All subscripts are the same type (on my machine, + /// an i64). The loop bound may be a smaller type. collectUpperBound + /// find the bound, if available, and zero extends it to the Type T. + /// (I zero extend since the bound should always be >= 0.) + /// If no upper bound is available, return NULL. + const SCEV *collectUpperBound(const Loop *l, Type *T) const; + + /// collectConstantUpperBound - Calls collectUpperBound(), then + /// attempts to cast it to SCEVConstant. If the cast fails, + /// returns NULL. + const SCEVConstant *collectConstantUpperBound(const Loop *l, Type *T) const; + + /// classifyPair - Examines the subscript pair (the Src and Dst SCEVs) + /// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. + /// Collects the associated loops in a set. + Subscript::ClassificationKind classifyPair(const SCEV *Src, + const Loop *SrcLoopNest, + const SCEV *Dst, + const Loop *DstLoopNest, + SmallBitVector &Loops); + + /// testZIV - Tests the ZIV subscript pair (Src and Dst) for dependence. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// If the dependence isn't proven to exist, + /// marks the Result as inconsistent. + bool testZIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const; + + /// testSIV - Tests the SIV subscript pair (Src and Dst) for dependence. + /// Things of the form [c1 + a1*i] and [c2 + a2*j], where + /// i and j are induction variables, c1 and c2 are loop invariant, + /// and a1 and a2 are constant. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction vector entry and, when possible, + /// the distance vector entry. + /// If the dependence isn't proven to exist, + /// marks the Result as inconsistent. + bool testSIV(const SCEV *Src, + const SCEV *Dst, + unsigned &Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const; + + /// testRDIV - Tests the RDIV subscript pair (Src and Dst) for dependence. + /// Things of the form [c1 + a1*i] and [c2 + a2*j] + /// where i and j are induction variables, c1 and c2 are loop invariant, + /// and a1 and a2 are constant. + /// With minor algebra, this test can also be used for things like + /// [c1 + a1*i + a2*j][c2]. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Marks the Result as inconsistent. + bool testRDIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const; + + /// testMIV - Tests the MIV subscript pair (Src and Dst) for dependence. + /// Returns true if dependence disproved. + /// Can sometimes refine direction vectors. + bool testMIV(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const; + + /// strongSIVtest - Tests the strong SIV subscript pair (Src and Dst) + /// for dependence. + /// Things of the form [c1 + a*i] and [c2 + a*i], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a is a constant + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction and distance. + bool strongSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const; + + /// weakCrossingSIVtest - Tests the weak-crossing SIV subscript pair + /// (Src and Dst) for dependence. + /// Things of the form [c1 + a*i] and [c2 - a*i], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a is a constant. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction entry. + /// Set consistent to false. + /// Marks the dependence as splitable. + bool weakCrossingSIVtest(const SCEV *SrcCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const; + + /// ExactSIVtest - Tests the SIV subscript pair + /// (Src and Dst) for dependence. + /// Things of the form [c1 + a1*i] and [c2 + a2*i], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a1 and a2 are constant. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction entry. + /// Set consistent to false. + bool exactSIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const; + + /// weakZeroSrcSIVtest - Tests the weak-zero SIV subscript pair + /// (Src and Dst) for dependence. + /// Things of the form [c1] and [c2 + a*i], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a is a constant. See also weakZeroDstSIVtest. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction entry. + /// Set consistent to false. + /// If loop peeling will break the dependence, mark appropriately. + bool weakZeroSrcSIVtest(const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const; + + /// weakZeroDstSIVtest - Tests the weak-zero SIV subscript pair + /// (Src and Dst) for dependence. + /// Things of the form [c1 + a*i] and [c2], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a is a constant. See also weakZeroSrcSIVtest. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction entry. + /// Set consistent to false. + /// If loop peeling will break the dependence, mark appropriately. + bool weakZeroDstSIVtest(const SCEV *SrcCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const; + + /// exactRDIVtest - Tests the RDIV subscript pair for dependence. + /// Things of the form [c1 + a*i] and [c2 + b*j], + /// where i and j are induction variable, c1 and c2 are loop invariant, + /// and a and b are constants. + /// Returns true if any possible dependence is disproved. + /// Marks the result as inconsistant. + /// Works in some cases that symbolicRDIVtest doesn't, + /// and vice versa. + bool exactRDIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *SrcLoop, + const Loop *DstLoop, + FullDependence &Result) const; + + /// symbolicRDIVtest - Tests the RDIV subscript pair for dependence. + /// Things of the form [c1 + a*i] and [c2 + b*j], + /// where i and j are induction variable, c1 and c2 are loop invariant, + /// and a and b are constants. + /// Returns true if any possible dependence is disproved. + /// Marks the result as inconsistant. + /// Works in some cases that exactRDIVtest doesn't, + /// and vice versa. Can also be used as a backup for + /// ordinary SIV tests. + bool symbolicRDIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *SrcLoop, + const Loop *DstLoop) const; + + /// gcdMIVtest - Tests an MIV subscript pair for dependence. + /// Returns true if any possible dependence is disproved. + /// Marks the result as inconsistant. + /// Can sometimes disprove the equal direction for 1 or more loops. + // Can handle some symbolics that even the SIV tests don't get, + /// so we use it as a backup for everything. + bool gcdMIVtest(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const; + + /// banerjeeMIVtest - Tests an MIV subscript pair for dependence. + /// Returns true if any possible dependence is disproved. + /// Marks the result as inconsistant. + /// Computes directions. + bool banerjeeMIVtest(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const; + + /// collectCoefficientInfo - Walks through the subscript, + /// collecting each coefficient, the associated loop bounds, + /// and recording its positive and negative parts for later use. + CoefficientInfo *collectCoeffInfo(const SCEV *Subscript, + bool SrcFlag, + const SCEV *&Constant) const; + + /// getPositivePart - X^+ = max(X, 0). + /// + const SCEV *getPositivePart(const SCEV *X) const; + + /// getNegativePart - X^- = min(X, 0). + /// + const SCEV *getNegativePart(const SCEV *X) const; + + /// getLowerBound - Looks through all the bounds info and + /// computes the lower bound given the current direction settings + /// at each level. + const SCEV *getLowerBound(BoundInfo *Bound) const; + + /// getUpperBound - Looks through all the bounds info and + /// computes the upper bound given the current direction settings + /// at each level. + const SCEV *getUpperBound(BoundInfo *Bound) const; + + /// exploreDirections - Hierarchically expands the direction vector + /// search space, combining the directions of discovered dependences + /// in the DirSet field of Bound. Returns the number of distinct + /// dependences discovered. If the dependence is disproved, + /// it will return 0. + unsigned exploreDirections(unsigned Level, + CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + const SmallBitVector &Loops, + unsigned &DepthExpanded, + const SCEV *Delta) const; + + /// testBounds - Returns true iff the current bounds are plausible. + /// + bool testBounds(unsigned char DirKind, + unsigned Level, + BoundInfo *Bound, + const SCEV *Delta) const; + + /// findBoundsALL - Computes the upper and lower bounds for level K + /// using the * direction. Records them in Bound. + void findBoundsALL(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const; + + /// findBoundsLT - Computes the upper and lower bounds for level K + /// using the < direction. Records them in Bound. + void findBoundsLT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const; + + /// findBoundsGT - Computes the upper and lower bounds for level K + /// using the > direction. Records them in Bound. + void findBoundsGT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const; + + /// findBoundsEQ - Computes the upper and lower bounds for level K + /// using the = direction. Records them in Bound. + void findBoundsEQ(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const; + + /// intersectConstraints - Updates X with the intersection + /// of the Constraints X and Y. Returns true if X has changed. + bool intersectConstraints(Constraint *X, + const Constraint *Y); + + /// propagate - Review the constraints, looking for opportunities + /// to simplify a subscript pair (Src and Dst). + /// Return true if some simplification occurs. + /// If the simplification isn't exact (that is, if it is conservative + /// in terms of dependence), set consistent to false. + bool propagate(const SCEV *&Src, + const SCEV *&Dst, + SmallBitVector &Loops, + SmallVector &Constraints, + bool &Consistent); + + /// propagateDistance - Attempt to propagate a distance + /// constraint into a subscript pair (Src and Dst). + /// Return true if some simplification occurs. + /// If the simplification isn't exact (that is, if it is conservative + /// in terms of dependence), set consistent to false. + bool propagateDistance(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent); + + /// propagatePoint - Attempt to propagate a point + /// constraint into a subscript pair (Src and Dst). + /// Return true if some simplification occurs. + bool propagatePoint(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint); + + /// propagateLine - Attempt to propagate a line + /// constraint into a subscript pair (Src and Dst). + /// Return true if some simplification occurs. + /// If the simplification isn't exact (that is, if it is conservative + /// in terms of dependence), set consistent to false. + bool propagateLine(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent); + + /// findCoefficient - Given a linear SCEV, + /// return the coefficient corresponding to specified loop. + /// If there isn't one, return the SCEV constant 0. + /// For example, given a*i + b*j + c*k, returning the coefficient + /// corresponding to the j loop would yield b. + const SCEV *findCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const; + + /// zeroCoefficient - Given a linear SCEV, + /// return the SCEV given by zeroing out the coefficient + /// corresponding to the specified loop. + /// For example, given a*i + b*j + c*k, zeroing the coefficient + /// corresponding to the j loop would yield a*i + c*k. + const SCEV *zeroCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const; + + /// addToCoefficient - Given a linear SCEV Expr, + /// return the SCEV given by adding some Value to the + /// coefficient corresponding to the specified TargetLoop. + /// For example, given a*i + b*j + c*k, adding 1 to the coefficient + /// corresponding to the j loop would yield a*i + (b+1)*j + c*k. + const SCEV *addToCoefficient(const SCEV *Expr, + const Loop *TargetLoop, + const SCEV *Value) const; + + /// updateDirection - Update direction vector entry + /// based on the current constraint. + void updateDirection(Dependence::DVEntry &Level, + const Constraint &CurConstraint) const; + public: + static char ID; // Class identification, replacement for typeinfo + DependenceAnalysis() : FunctionPass(ID) { + initializeDependenceAnalysisPass(*PassRegistry::getPassRegistry()); + }; + + bool runOnFunction(Function &F); + void releaseMemory(); + void getAnalysisUsage(AnalysisUsage &) const; + void print(raw_ostream &, const Module * = 0) const; + }; // class DependenceAnalysis + + /// createDependenceAnalysisPass - This creates an instance of the + /// DependenceAnalysis pass. + FunctionPass *createDependenceAnalysisPass(); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h index c52f846..eddc7c4 100644 --- a/llvm/include/llvm/Analysis/Passes.h +++ b/llvm/include/llvm/Analysis/Passes.h @@ -183,8 +183,17 @@ namespace llvm { // createLoopDependenceAnalysisPass - This creates an instance of the // LoopDependenceAnalysis pass. // + LoopPass *createDependenceAnalysisPass(); + + //===--------------------------------------------------------------------===// + // + // createLoopDependenceAnalysisPass - This creates an instance of the + // LoopDependenceAnalysis pass. + // LoopPass *createLoopDependenceAnalysisPass(); + //===--------------------------------------------------------------------===// + // // Minor pass prototypes, allowing us to expose them through bugpoint and // analyze. FunctionPass *createInstCountPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 067d8da..ee9b1c5 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -94,6 +94,7 @@ void initializeDCEPass(PassRegistry&); void initializeDSEPass(PassRegistry&); void initializeDeadInstEliminationPass(PassRegistry&); void initializeDeadMachineInstructionElimPass(PassRegistry&); +void initializeDependenceAnalysisPass(PassRegistry&); void initializeDomOnlyPrinterPass(PassRegistry&); void initializeDomOnlyViewerPass(PassRegistry&); void initializeDomPrinterPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index c01e471..4b10d0e 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -64,6 +64,7 @@ namespace { (void) llvm::createDeadCodeEliminationPass(); (void) llvm::createDeadInstEliminationPass(); (void) llvm::createDeadStoreEliminationPass(); + (void) llvm::createDependenceAnalysisPass(); (void) llvm::createDomOnlyPrinterPass(); (void) llvm::createDomPrinterPass(); (void) llvm::createDomOnlyViewerPass(); diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index 87a75fd..588206e 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -31,6 +31,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); initializePrintDbgInfoPass(Registry); + initializeDependenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index e461848..3ce888f 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_library(LLVMAnalysis CodeMetrics.cpp ConstantFolding.cpp DbgInfoPrinter.cpp + DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp new file mode 100644 index 0000000..c7bec43 --- /dev/null +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -0,0 +1,3781 @@ +//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// DependenceAnalysis is an LLVM pass that analyses dependences between memory +// accesses. Currently, it is an (incomplete) implementation of the approach +// described in +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// There's a single entry point that analyzes the dependence between a pair +// of memory references in a function, returning either NULL, for no dependence, +// or a more-or-less detailed description of the dependence between them. +// +// Currently, the implementation cannot propagate constraints between +// coupled RDIV subscripts and lacks a multi-subscript MIV test. +// Both of these are conservative weaknesses; +// that is, not a source of correctness problems. +// +// The implementation depends on the GEP instruction to +// differentiate subscripts. Since Clang linearizes subscripts +// for most arrays, we give up some precision (though the existing MIV tests +// will help). We trust that the GEP instruction will eventually be extended. +// In the meantime, we should explore Maslov's ideas about delinearization. +// +// We should pay some careful attention to the possibility of integer overflow +// in the implementation of the various tests. This could happen with Add, +// Subtract, or Multiply, with both APInt's and SCEV's. +// +// Some non-linear subscript pairs can be handled by the GCD test +// (and perhaps other tests). +// Should explore how often these things occur. +// +// Finally, it seems like certain test cases expose weaknesses in the SCEV +// simplification, especially in the handling of sign and zero extensions. +// It could be useful to spend time exploring these. +// +// Please note that this is work in progress and the interface is subject to +// change. +// +//===----------------------------------------------------------------------===// +// // +// In memory of Ken Kennedy, 1945 - 2007 // +// // +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "da" + +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Instructions.h" +#include "llvm/Operator.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// statistics + +STATISTIC(TotalArrayPairs, "Array pairs tested"); +STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); +STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); +STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); +STATISTIC(ZIVapplications, "ZIV applications"); +STATISTIC(ZIVindependence, "ZIV independence"); +STATISTIC(StrongSIVapplications, "Strong SIV applications"); +STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); +STATISTIC(StrongSIVindependence, "Strong SIV independence"); +STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); +STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); +STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); +STATISTIC(ExactSIVapplications, "Exact SIV applications"); +STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); +STATISTIC(ExactSIVindependence, "Exact SIV independence"); +STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); +STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); +STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); +STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); +STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); +STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); +STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); +STATISTIC(DeltaApplications, "Delta applications"); +STATISTIC(DeltaSuccesses, "Delta successes"); +STATISTIC(DeltaIndependence, "Delta independence"); +STATISTIC(DeltaPropagations, "Delta propagations"); +STATISTIC(GCDapplications, "GCD applications"); +STATISTIC(GCDsuccesses, "GCD successes"); +STATISTIC(GCDindependence, "GCD independence"); +STATISTIC(BanerjeeApplications, "Banerjee applications"); +STATISTIC(BanerjeeIndependence, "Banerjee independence"); +STATISTIC(BanerjeeSuccesses, "Banerjee successes"); + +//===----------------------------------------------------------------------===// +// basics + +INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", + "Dependence Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(DependenceAnalysis, "da", + "Dependence Analysis", true, true) + +char DependenceAnalysis::ID = 0; + + +FunctionPass *llvm::createDependenceAnalysisPass() { + return new DependenceAnalysis(); +} + + +bool DependenceAnalysis::runOnFunction(Function &F) { + this->F = &F; + AA = &getAnalysis(); + SE = &getAnalysis(); + LI = &getAnalysis(); + return false; +} + + +void DependenceAnalysis::releaseMemory() { +} + + +void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); +} + + +// Used to test the dependence analyzer. +// Looks through the function, noting the first store instruction +// and the first load instruction +// (which always follows the first load in our tests). +// Calls depends() and prints out the result. +// Ignores all other instructions. +static +void dumpExampleDependence(raw_ostream &OS, Function *F, + DependenceAnalysis *DA) { + for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); + SrcI != SrcE; ++SrcI) { + if (const StoreInst *Src = dyn_cast(&*SrcI)) { + for (inst_iterator DstI = SrcI, DstE = inst_end(F); + DstI != DstE; ++DstI) { + if (const LoadInst *Dst = dyn_cast(&*DstI)) { + OS << "da analyze - "; + if (Dependence *D = DA->depends(Src, Dst, true)) { + D->dump(OS); + for (unsigned Level = 1; Level <= D->getLevels(); Level++) { + if (D->isSplitable(Level)) { + OS << "da analyze - split level = " << Level; + OS << ", iteration = " << *DA->getSplitIteration(D, Level); + OS << "!\n"; + } + } + delete D; + } + else + OS << "none!\n"; + return; + } + } + } + } +} + + +void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { + dumpExampleDependence(OS, F, const_cast(this)); +} + +//===----------------------------------------------------------------------===// +// Dependence methods + +// Returns true if this is an input dependence. +bool Dependence::isInput() const { + return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); +} + + +// Returns true if this is an output dependence. +bool Dependence::isOutput() const { + return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); +} + + +// Returns true if this is an flow (aka true) dependence. +bool Dependence::isFlow() const { + return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); +} + + +// Returns true if this is an anti dependence. +bool Dependence::isAnti() const { + return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); +} + + +// Returns true if a particular level is scalar; that is, +// if no subscript in the source or destination mention the induction +// variable associated with the loop at this level. +// Leave this out of line, so it will serve as a virtual method anchor +bool Dependence::isScalar(unsigned level) const { + return false; +} + + +//===----------------------------------------------------------------------===// +// FullDependence methods + +FullDependence::FullDependence(const Instruction *Source, + const Instruction *Destination, + bool PossiblyLoopIndependent, + unsigned CommonLevels) : + Dependence(Source, Destination), + Levels(CommonLevels), + LoopIndependent(PossiblyLoopIndependent) { + Consistent = true; + DV = CommonLevels ? new DVEntry[CommonLevels] : NULL; +} + +// The rest are simple getters that hide the implementation. + +// getDirection - Returns the direction associated with a particular level. +unsigned FullDependence::getDirection(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Direction; +} + + +// Returns the distance (or NULL) associated with a particular level. +const SCEV *FullDependence::getDistance(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Distance; +} + + +// Returns true if a particular level is scalar; that is, +// if no subscript in the source or destination mention the induction +// variable associated with the loop at this level. +bool FullDependence::isScalar(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Scalar; +} + + +// Returns true if peeling the first iteration from this loop +// will break this dependence. +bool FullDependence::isPeelFirst(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].PeelFirst; +} + + +// Returns true if peeling the last iteration from this loop +// will break this dependence. +bool FullDependence::isPeelLast(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].PeelLast; +} + + +// Returns true if splitting this loop will break the dependence. +bool FullDependence::isSplitable(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Splitable; +} + + +//===----------------------------------------------------------------------===// +// DependenceAnalysis::Constraint methods + +// If constraint is a point , returns X. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getX() const { + assert(Kind == Point && "Kind should be Point"); + return A; +} + + +// If constraint is a point , returns Y. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getY() const { + assert(Kind == Point && "Kind should be Point"); + return B; +} + + +// If constraint is a line AX + BY = C, returns A. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getA() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return A; +} + + +// If constraint is a line AX + BY = C, returns B. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getB() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return B; +} + + +// If constraint is a line AX + BY = C, returns C. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getC() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return C; +} + + +// If constraint is a distance, returns D. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getD() const { + assert(Kind == Distance && "Kind should be Distance"); + return SE->getNegativeSCEV(C); +} + + +// Returns the loop associated with this constraint. +const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { + assert((Kind == Distance || Kind == Line || Kind == Point) && + "Kind should be Distance, Line, or Point"); + return AssociatedLoop; +} + + +void DependenceAnalysis::Constraint::setPoint(const SCEV *X, + const SCEV *Y, + const Loop *CurLoop) { + Kind = Point; + A = X; + B = Y; + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setLine(const SCEV *AA, + const SCEV *BB, + const SCEV *CC, + const Loop *CurLoop) { + Kind = Line; + A = AA; + B = BB; + C = CC; + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setDistance(const SCEV *D, + const Loop *CurLoop) { + Kind = Distance; + A = SE->getConstant(D->getType(), 1); + B = SE->getNegativeSCEV(A); + C = SE->getNegativeSCEV(D); + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setEmpty() { + Kind = Empty; +} + + +void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { + SE = NewSE; + Kind = Any; +} + + +// For debugging purposes. Dumps the constraint out to OS. +void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { + if (isEmpty()) + OS << " Empty\n"; + else if (isAny()) + OS << " Any\n"; + else if (isPoint()) + OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; + else if (isDistance()) + OS << " Distance is " << *getD() << + " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; + else if (isLine()) + OS << " Line is " << *getA() << "*X + " << + *getB() << "*Y = " << *getC() << "\n"; + else + llvm_unreachable("unknown constraint type in Constraint::dump"); +} + + +// Updates X with the intersection +// of the Constraints X and Y. Returns true if X has changed. +// Corresponds to Figure 4 from the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +bool DependenceAnalysis::intersectConstraints(Constraint *X, + const Constraint *Y) { + ++DeltaApplications; + DEBUG(dbgs() << "\tintersect constraints\n"); + DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); + DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); + assert(!Y->isPoint() && "Y must not be a Point"); + if (X->isAny()) { + if (Y->isAny()) + return false; + *X = *Y; + return true; + } + if (X->isEmpty()) + return false; + if (Y->isEmpty()) { + X->setEmpty(); + return true; + } + + if (X->isDistance() && Y->isDistance()) { + DEBUG(dbgs() << "\t intersect 2 distances\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + // Hmmm, interesting situation. + // I guess if either is constant, keep it and ignore the other. + if (isa(Y->getD())) { + *X = *Y; + return true; + } + return false; + } + + // At this point, the pseudo-code in Figure 4 of the paper + // checks if (X->isPoint() && Y->isPoint()). + // This case can't occur in our implementation, + // since a Point can only arise as the result of intersecting + // two Line constraints, and the right-hand value, Y, is never + // the result of an intersection. + assert(!(X->isPoint() && Y->isPoint()) && + "We shouldn't ever see X->isPoint() && Y->isPoint()"); + + if (X->isLine() && Y->isLine()) { + DEBUG(dbgs() << "\t intersect 2 lines\n"); + const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); + const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); + if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { + // slopes are equal, so lines are parallel + DEBUG(dbgs() << "\t\tsame slope\n"); + Prod1 = SE->getMulExpr(X->getC(), Y->getB()); + Prod2 = SE->getMulExpr(X->getB(), Y->getC()); + if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + return false; + } + if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { + // slopes differ, so lines intersect + DEBUG(dbgs() << "\t\tdifferent slopes\n"); + const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); + const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); + const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); + const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); + const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); + const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); + const SCEVConstant *C1A2_C2A1 = + dyn_cast(SE->getMinusSCEV(C1A2, C2A1)); + const SCEVConstant *C1B2_C2B1 = + dyn_cast(SE->getMinusSCEV(C1B2, C2B1)); + const SCEVConstant *A1B2_A2B1 = + dyn_cast(SE->getMinusSCEV(A1B2, A2B1)); + const SCEVConstant *A2B1_A1B2 = + dyn_cast(SE->getMinusSCEV(A2B1, A1B2)); + if (!C1B2_C2B1 || !C1A2_C2A1 || + !A1B2_A2B1 || !A2B1_A1B2) + return false; + APInt Xtop = C1B2_C2B1->getValue()->getValue(); + APInt Xbot = A1B2_A2B1->getValue()->getValue(); + APInt Ytop = C1A2_C2A1->getValue()->getValue(); + APInt Ybot = A2B1_A1B2->getValue()->getValue(); + DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); + DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); + DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); + DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); + APInt Xq = Xtop; // these need to be initialized, even + APInt Xr = Xtop; // though they're just going to be overwritten + APInt::sdivrem(Xtop, Xbot, Xq, Xr); + APInt Yq = Ytop; + APInt Yr = Ytop;; + APInt::sdivrem(Ytop, Ybot, Yq, Yr); + if (Xr != 0 || Yr != 0) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); + if (Xq.slt(0) || Yq.slt(0)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + if (const SCEVConstant *CUB = + collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { + APInt UpperBound = CUB->getValue()->getValue(); + DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); + if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + } + X->setPoint(SE->getConstant(Xq), + SE->getConstant(Yq), + X->getAssociatedLoop()); + ++DeltaSuccesses; + return true; + } + return false; + } + + // if (X->isLine() && Y->isPoint()) This case can't occur. + assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); + + if (X->isPoint() && Y->isLine()) { + DEBUG(dbgs() << "\t intersect Point and Line\n"); + const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); + const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); + const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); + if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + return false; + } + + llvm_unreachable("shouldn't reach the end of Constraint intersection"); + return false; +} + + +//===----------------------------------------------------------------------===// +// DependenceAnalysis methods + +// For debugging purposes. Dumps a dependence to OS. +void Dependence::dump(raw_ostream &OS) const { + bool Splitable = false; + if (isConfused()) + OS << "confused"; + else { + if (isConsistent()) + OS << "consistent "; + if (isFlow()) + OS << "flow"; + else if (isOutput()) + OS << "output"; + else if (isAnti()) + OS << "anti"; + else if (isInput()) + OS << "input"; + unsigned Levels = getLevels(); + if (Levels) { + OS << " ["; + for (unsigned II = 1; II <= Levels; ++II) { + if (isSplitable(II)) + Splitable = true; + if (isPeelFirst(II)) + OS << 'p'; + const SCEV *Distance = getDistance(II); + if (Distance) + OS << *Distance; + else if (isScalar(II)) + OS << "S"; + else { + unsigned Direction = getDirection(II); + if (Direction == DVEntry::ALL) + OS << "*"; + else { + if (Direction & DVEntry::LT) + OS << "<"; + if (Direction & DVEntry::EQ) + OS << "="; + if (Direction & DVEntry::GT) + OS << ">"; + } + } + if (isPeelLast(II)) + OS << 'p'; + if (II < Levels) + OS << " "; + } + if (isLoopIndependent()) + OS << "|<"; + OS << "]"; + if (Splitable) + OS << " splitable"; + } + } + OS << "!\n"; +} + + + +static +AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, + const Value *A, + const Value *B) { + const Value *AObj = GetUnderlyingObject(A); + const Value *BObj = GetUnderlyingObject(B); + return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), + BObj, AA->getTypeStoreSize(BObj->getType())); +} + + +// Returns true if the load or store can be analyzed. Atomic and volatile +// operations have properties which this analysis does not understand. +static +bool isLoadOrStore(const Instruction *I) { + if (const LoadInst *LI = dyn_cast(I)) + return LI->isUnordered(); + else if (const StoreInst *SI = dyn_cast(I)) + return SI->isUnordered(); + return false; +} + + +static +const Value *getPointerOperand(const Instruction *I) { + if (const LoadInst *LI = dyn_cast(I)) + return LI->getPointerOperand(); + if (const StoreInst *SI = dyn_cast(I)) + return SI->getPointerOperand(); + llvm_unreachable("Value is not load or store instruction"); + return 0; +} + + +// Examines the loop nesting of the Src and Dst +// instructions and establishes their shared loops. Sets the variables +// CommonLevels, SrcLevels, and MaxLevels. +// The source and destination instructions needn't be contained in the same +// loop. The routine establishNestingLevels finds the level of most deeply +// nested loop that contains them both, CommonLevels. An instruction that's +// not contained in a loop is at level = 0. MaxLevels is equal to the level +// of the source plus the level of the destination, minus CommonLevels. +// This lets us allocate vectors MaxLevels in length, with room for every +// distinct loop referenced in both the source and destination subscripts. +// The variable SrcLevels is the nesting depth of the source instruction. +// It's used to help calculate distinct loops referenced by the destination. +// Here's the map from loops to levels: +// 0 - unused +// 1 - outermost common loop +// ... - other common loops +// CommonLevels - innermost common loop +// ... - loops containing Src but not Dst +// SrcLevels - innermost loop containing Src but not Dst +// ... - loops containing Dst but not Src +// MaxLevels - innermost loops containing Dst but not Src +// Consider the follow code fragment: +// for (a = ...) { +// for (b = ...) { +// for (c = ...) { +// for (d = ...) { +// A[] = ...; +// } +// } +// for (e = ...) { +// for (f = ...) { +// for (g = ...) { +// ... = A[]; +// } +// } +// } +// } +// } +// If we're looking at the possibility of a dependence between the store +// to A (the Src) and the load from A (the Dst), we'll note that they +// have 2 loops in common, so CommonLevels will equal 2 and the direction +// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. +// A map from loop names to loop numbers would look like +// a - 1 +// b - 2 = CommonLevels +// c - 3 +// d - 4 = SrcLevels +// e - 5 +// f - 6 +// g - 7 = MaxLevels +void DependenceAnalysis::establishNestingLevels(const Instruction *Src, + const Instruction *Dst) { + const BasicBlock *SrcBlock = Src->getParent(); + const BasicBlock *DstBlock = Dst->getParent(); + unsigned SrcLevel = LI->getLoopDepth(SrcBlock); + unsigned DstLevel = LI->getLoopDepth(DstBlock); + const Loop *SrcLoop = LI->getLoopFor(SrcBlock); + const Loop *DstLoop = LI->getLoopFor(DstBlock); + SrcLevels = SrcLevel; + MaxLevels = SrcLevel + DstLevel; + while (SrcLevel > DstLevel) { + SrcLoop = SrcLoop->getParentLoop(); + SrcLevel--; + } + while (DstLevel > SrcLevel) { + DstLoop = DstLoop->getParentLoop(); + DstLevel--; + } + while (SrcLoop != DstLoop) { + SrcLoop = SrcLoop->getParentLoop(); + DstLoop = DstLoop->getParentLoop(); + SrcLevel--; + } + CommonLevels = SrcLevel; + MaxLevels -= CommonLevels; +} + + +// Given one of the loops containing the source, return +// its level index in our numbering scheme. +unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { + return SrcLoop->getLoopDepth(); +} + + +// Given one of the loops containing the destination, +// return its level index in our numbering scheme. +unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { + unsigned D = DstLoop->getLoopDepth(); + if (D > CommonLevels) + return D - CommonLevels + SrcLevels; + else + return D; +} + + +// Returns true if Expression is loop invariant in LoopNest. +bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, + const Loop *LoopNest) const { + if (!LoopNest) + return true; + return SE->isLoopInvariant(Expression, LoopNest) && + isLoopInvariant(Expression, LoopNest->getParentLoop()); +} + + + +// Finds the set of loops from the LoopNest that +// have a level <= CommonLevels and are referred to by the SCEV Expression. +void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, + const Loop *LoopNest, + SmallBitVector &Loops) const { + while (LoopNest) { + unsigned Level = LoopNest->getLoopDepth(); + if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) + Loops.set(Level); + LoopNest = LoopNest->getParentLoop(); + } +} + + +// removeMatchingExtensions - Examines a subscript pair. +// If the source and destination are identically sign (or zero) +// extended, it strips off the extension in an effect to simplify +// the actual analysis. +void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { + const SCEV *Src = Pair->Src; + const SCEV *Dst = Pair->Dst; + if ((isa(Src) && isa(Dst)) || + (isa(Src) && isa(Dst))) { + const SCEVCastExpr *SrcCast = cast(Src); + const SCEVCastExpr *DstCast = cast(Dst); + if (SrcCast->getType() == DstCast->getType()) { + Pair->Src = SrcCast->getOperand(); + Pair->Dst = DstCast->getOperand(); + } + } +} + + +// Examine the scev and return true iff it's linear. +// Collect any loops mentioned in the set of "Loops". +bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, + const Loop *LoopNest, + SmallBitVector &Loops) { + const SCEVAddRecExpr *AddRec = dyn_cast(Src); + if (!AddRec) + return isLoopInvariant(Src, LoopNest); + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + if (!isLoopInvariant(Step, LoopNest)) + return false; + Loops.set(mapSrcLoop(AddRec->getLoop())); + return checkSrcSubscript(Start, LoopNest, Loops); +} + + + +// Examine the scev and return true iff it's linear. +// Collect any loops mentioned in the set of "Loops". +bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, + const Loop *LoopNest, + SmallBitVector &Loops) { + const SCEVAddRecExpr *AddRec = dyn_cast(Dst); + if (!AddRec) + return isLoopInvariant(Dst, LoopNest); + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + if (!isLoopInvariant(Step, LoopNest)) + return false; + Loops.set(mapDstLoop(AddRec->getLoop())); + return checkDstSubscript(Start, LoopNest, Loops); +} + + +// Examines the subscript pair (the Src and Dst SCEVs) +// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. +// Collects the associated loops in a set. +DependenceAnalysis::Subscript::ClassificationKind +DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, + const SCEV *Dst, const Loop *DstLoopNest, + SmallBitVector &Loops) { + SmallBitVector SrcLoops(MaxLevels + 1); + SmallBitVector DstLoops(MaxLevels + 1); + if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) + return Subscript::NonLinear; + if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) + return Subscript::NonLinear; + Loops = SrcLoops; + Loops |= DstLoops; + unsigned N = Loops.count(); + if (N == 0) + return Subscript::ZIV; + if (N == 1) + return Subscript::SIV; + if (N == 2 && (SrcLoops.count() == 0 || + DstLoops.count() == 0 || + (SrcLoops.count() == 1 && DstLoops.count() == 1))) + return Subscript::RDIV; + return Subscript::MIV; +} + + +// A wrapper around SCEV::isKnownPredicate. +// Looks for cases where we're interested in comparing for equality. +// If both X and Y have been identically sign or zero extended, +// it strips off the (confusing) extensions before invoking +// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package +// will be similarly updated. +// +// If SCEV::isKnownPredicate can't prove the predicate, +// we try simple subtraction, which seems to help in some cases +// involving symbolics. +bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *X, + const SCEV *Y) const { + if (Pred == CmpInst::ICMP_EQ || + Pred == CmpInst::ICMP_NE) { + if ((isa(X) && + isa(Y)) || + (isa(X) && + isa(Y))) { + const SCEVCastExpr *CX = cast(X); + const SCEVCastExpr *CY = cast(Y); + const SCEV *Xop = CX->getOperand(); + const SCEV *Yop = CY->getOperand(); + if (Xop->getType() == Yop->getType()) { + X = Xop; + Y = Yop; + } + } + } + if (SE->isKnownPredicate(Pred, X, Y)) + return true; + // If SE->isKnownPredicate can't prove the condition, + // we try the brute-force approach of subtracting + // and testing the difference. + // By testing with SE->isKnownPredicate first, we avoid + // the possibility of overflow when the arguments are constants. + const SCEV *Delta = SE->getMinusSCEV(X, Y); + switch (Pred) { + case CmpInst::ICMP_EQ: + return Delta->isZero(); + case CmpInst::ICMP_NE: + return SE->isKnownNonZero(Delta); + case CmpInst::ICMP_SGE: + return SE->isKnownNonNegative(Delta); + case CmpInst::ICMP_SLE: + return SE->isKnownNonPositive(Delta); + case CmpInst::ICMP_SGT: + return SE->isKnownPositive(Delta); + case CmpInst::ICMP_SLT: + return SE->isKnownNegative(Delta); + default: + llvm_unreachable("unexpected predicate in isKnownPredicate"); + } +} + + +// All subscripts are all the same type. +// Loop bound may be smaller (e.g., a char). +// Should zero extend loop bound, since it's always >= 0. +// This routine collects upper bound and extends if needed. +// Return null if no bound available. +const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, + Type *T) const { + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + const SCEV *UB = SE->getBackedgeTakenCount(L); + return SE->getNoopOrZeroExtend(UB, T); + } + return NULL; +} + + +// Calls collectUpperBound(), then attempts to cast it to SCEVConstant. +// If the cast fails, returns NULL. +const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, + Type *T + ) const { + if (const SCEV *UB = collectUpperBound(L, T)) + return dyn_cast(UB); + return NULL; +} + + +// testZIV - +// When we have a pair of subscripts of the form [c1] and [c2], +// where c1 and c2 are both loop invariant, we attack it using +// the ZIV test. Basically, we test by comparing the two values, +// but there are actually three possible results: +// 1) the values are equal, so there's a dependence +// 2) the values are different, so there's no dependence +// 3) the values might be equal, so we have to assume a dependence. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testZIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + ++ZIVapplications; + if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { + DEBUG(dbgs() << " provably dependent\n"); + return false; // provably dependent + } + if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { + DEBUG(dbgs() << " provably independent\n"); + ++ZIVindependence; + return true; // provably independent + } + DEBUG(dbgs() << " possibly dependent\n"); + Result.Consistent = false; + return false; // possibly dependent +} + + +// strongSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.1 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the Strong SIV test. +// +// Can prove independence. Failing that, can compute distance (and direction). +// In the presence of symbolic terms, we can sometimes make progress. +// +// If there's a dependence, +// +// c1 + a*i = c2 + a*i' +// +// The dependence distance is +// +// d = i' - i = (c1 - c2)/a +// +// A dependence only exists if d is an integer and abs(d) <= U, where U is the +// loop's upper bound. If a dependence exists, the dependence direction is +// defined as +// +// { < if d > 0 +// direction = { = if d = 0 +// { > if d < 0 +// +// Return true if dependence disproved. +bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + DEBUG(dbgs() << "\tStrong SIV test\n"); + DEBUG(dbgs() << "\t Coeff = " << *Coeff); + DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); + DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst); + DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); + ++StrongSIVapplications; + assert(0 < Level && Level <= CommonLevels && "level out of range"); + Level--; + + const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + DEBUG(dbgs() << "\t Delta = " << *Delta); + DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); + + // check that |Delta| < iteration count + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); + DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); + const SCEV *AbsDelta = + SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); + const SCEV *AbsCoeff = + SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); + const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); + if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { + // Distance greater than trip count - no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; + } + } + + // Can we compute distance? + if (isa(Delta) && isa(Coeff)) { + APInt ConstDelta = cast(Delta)->getValue()->getValue(); + APInt ConstCoeff = cast(Coeff)->getValue()->getValue(); + APInt Distance = ConstDelta; // these need to be initialized + APInt Remainder = ConstDelta; + APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); + DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + // Make sure Coeff divides Delta exactly + if (Remainder != 0) { + // Coeff doesn't divide Distance, no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; + } + Result.DV[Level].Distance = SE->getConstant(Distance); + NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); + if (Distance.sgt(0)) + Result.DV[Level].Direction &= Dependence::DVEntry::LT; + else if (Distance.slt(0)) + Result.DV[Level].Direction &= Dependence::DVEntry::GT; + else + Result.DV[Level].Direction &= Dependence::DVEntry::EQ; + ++StrongSIVsuccesses; + } + else if (Delta->isZero()) { + // since 0/X == 0 + Result.DV[Level].Distance = Delta; + NewConstraint.setDistance(Delta, CurLoop); + Result.DV[Level].Direction &= Dependence::DVEntry::EQ; + ++StrongSIVsuccesses; + } + else { + if (Coeff->isOne()) { + DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); + Result.DV[Level].Distance = Delta; // since X/1 == X + NewConstraint.setDistance(Delta, CurLoop); + } + else { + Result.Consistent = false; + NewConstraint.setLine(Coeff, + SE->getNegativeSCEV(Coeff), + SE->getNegativeSCEV(Delta), CurLoop); + } + + // maybe we can get a useful direction + bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); + bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); + bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); + bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); + bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); + // The double negatives above are confusing. + // It helps to read !SE->isKnownNonZero(Delta) + // as "Delta might be Zero" + unsigned NewDirection = Dependence::DVEntry::NONE; + if ((DeltaMaybePositive && CoeffMaybePositive) || + (DeltaMaybeNegative && CoeffMaybeNegative)) + NewDirection = Dependence::DVEntry::LT; + if (DeltaMaybeZero) + NewDirection |= Dependence::DVEntry::EQ; + if ((DeltaMaybeNegative && CoeffMaybePositive) || + (DeltaMaybePositive && CoeffMaybeNegative)) + NewDirection |= Dependence::DVEntry::GT; + if (NewDirection < Result.DV[Level].Direction) + ++StrongSIVsuccesses; + Result.DV[Level].Direction &= NewDirection; + } + return false; +} + + +// weakCrossingSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Crossing SIV test. +// +// Given c1 + a*i = c2 - a*i', we can look for the intersection of +// the two lines, where i = i', yielding +// +// c1 + a*i = c2 - a*i +// 2a*i = c2 - c1 +// i = (c2 - c1)/2a +// +// If i < 0, there is no dependence. +// If i > upperbound, there is no dependence. +// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. +// If i = upperbound, there's a dependence with distance = 0. +// If i is integral, there's a dependence (all directions). +// If the non-integer part = 1/2, there's a dependence (<> directions). +// Otherwise, there's no dependence. +// +// Can prove independence. Failing that, +// can sometimes refine the directions. +// Can determine iteration for splitting. +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const { + DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); + DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakCrossingSIVapplications; + assert(0 < Level && Level <= CommonLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); + if (Delta->isZero()) { + Result.DV[Level].Direction &= ~Dependence::DVEntry::LT; + Result.DV[Level].Direction &= ~Dependence::DVEntry::GT; + ++WeakCrossingSIVsuccesses; + if (!Result.DV[Level].Direction) { + ++WeakCrossingSIVindependence; + return true; + } + Result.DV[Level].Distance = Delta; // = 0 + return false; + } + const SCEVConstant *ConstCoeff = dyn_cast(Coeff); + if (!ConstCoeff) + return false; + + Result.DV[Level].Splitable = true; + if (SE->isKnownNegative(ConstCoeff)) { + ConstCoeff = dyn_cast(SE->getNegativeSCEV(ConstCoeff)); + assert(ConstCoeff && + "dynamic cast of negative of ConstCoeff should yield constant"); + Delta = SE->getNegativeSCEV(Delta); + } + assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); + + // compute SplitIter for use by DependenceAnalysis::getSplitIteration() + SplitIter = + SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), + Delta), + SE->getMulExpr(SE->getConstant(Delta->getType(), 2), + ConstCoeff)); + DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); + + const SCEVConstant *ConstDelta = dyn_cast(Delta); + if (!ConstDelta) + return false; + + // We're certain that ConstCoeff > 0; therefore, + // if Delta < 0, then no dependence. + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); + if (SE->isKnownNegative(Delta)) { + // No dependence, Delta < 0 + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + + // We're certain that Delta > 0 and ConstCoeff > 0. + // Check Delta/(2*ConstCoeff) against upper loop bound + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); + const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), + ConstantTwo); + DEBUG(dbgs() << "\t ML = " << *ML << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { + // Delta too big, no dependence + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { + // i = i' = UB + Result.DV[Level].Direction &= ~Dependence::DVEntry::LT; + Result.DV[Level].Direction &= ~Dependence::DVEntry::GT; + ++WeakCrossingSIVsuccesses; + if (!Result.DV[Level].Direction) { + ++WeakCrossingSIVindependence; + return true; + } + Result.DV[Level].Splitable = false; + Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); + return false; + } + } + + // check that Coeff divides Delta + APInt APDelta = ConstDelta->getValue()->getValue(); + APInt APCoeff = ConstCoeff->getValue()->getValue(); + APInt Distance = APDelta; // these need to be initialzed + APInt Remainder = APDelta; + APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + if (Remainder != 0) { + // Coeff doesn't divide Delta, no dependence + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + + // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible + APInt Two = APInt(Distance.getBitWidth(), 2, true); + Remainder = Distance.srem(Two); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + if (Remainder != 0) { + // Equal direction isn't possible + Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ; + ++WeakCrossingSIVsuccesses; + } + return false; +} + + +// Kirch's algorithm, from +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// MIT Press, 1989 +// +// Program 2.1, page 29. +// Computes the GCD of AM and BM. +// Also finds a solution to the equation ax - by = gdc(a, b). +// Returns true iff the gcd divides Delta. +static +bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, + APInt &G, APInt &X, APInt &Y) { + APInt A0(Bits, 1, true), A1(Bits, 0, true); + APInt B0(Bits, 0, true), B1(Bits, 1, true); + APInt G0 = AM.abs(); + APInt G1 = BM.abs(); + APInt Q = G0; // these need to be initialized + APInt R = G0; + APInt::sdivrem(G0, G1, Q, R); + while (R != 0) { + APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; + APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; + G0 = G1; G1 = R; + APInt::sdivrem(G0, G1, Q, R); + } + G = G1; + DEBUG(dbgs() << "\t GCD = " << G << "\n"); + X = AM.slt(0) ? -A1 : A1; + Y = BM.slt(0) ? B1 : -B1; + + // make sure gcd divides Delta + R = Delta.srem(G); + if (R != 0) + return true; // gcd doesn't divide Delta, no dependence + Q = Delta.sdiv(G); + X *= Q; + Y *= Q; + return false; +} + + +static +APInt floorOfQuotient(APInt A, APInt B) { + APInt Q = A; // these need to be initialized + APInt R = A; + APInt::sdivrem(A, B, Q, R); + if (R == 0) + return Q; + if ((A.sgt(0) && B.sgt(0)) || + (A.slt(0) && B.slt(0))) + return Q; + else + return Q - 1; +} + + +static +APInt ceilingOfQuotient(APInt A, APInt B) { + APInt Q = A; // these need to be initialized + APInt R = A; + APInt::sdivrem(A, B, Q, R); + if (R == 0) + return Q; + if ((A.sgt(0) && B.sgt(0)) || + (A.slt(0) && B.slt(0))) + return Q + 1; + else + return Q; +} + + +static +APInt maxAPInt(APInt A, APInt B) { + return A.sgt(B) ? A : B; +} + + +static +APInt minAPInt(APInt A, APInt B) { + return A.slt(B) ? A : B; +} + + +// exactSIVtest - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], +// where i is an induction variable, c1 and c2 are loop invariant, and a1 +// and a2 are constant, we can solve it exactly using an algorithm developed +// by Banerjee and Wolfe. See Section 2.5.3 in +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// MIT Press, 1989 +// +// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), +// so use them if possible. They're also a bit better with symbolics and, +// in the case of the strong SIV test, can compute Distances. +// +// Return true if dependence disproved. +bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + DEBUG(dbgs() << "\tExact SIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++ExactSIVapplications; + assert(0 < Level && Level <= CommonLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), + Delta, CurLoop); + const SCEVConstant *ConstDelta = dyn_cast(Delta); + const SCEVConstant *ConstSrcCoeff = dyn_cast(SrcCoeff); + const SCEVConstant *ConstDstCoeff = dyn_cast(DstCoeff); + if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) + return false; + + // find gcd + APInt G, X, Y; + APInt AM = ConstSrcCoeff->getValue()->getValue(); + APInt BM = ConstDstCoeff->getValue()->getValue(); + unsigned Bits = AM.getBitWidth(); + if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + // gcd doesn't divide Delta, no dependence + ++ExactSIVindependence; + ++ExactSIVsuccesses; + return true; + } + + DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + + // since SCEV construction normalizes, LM = 0 + APInt UM(Bits, 1, true); + bool UMvalid = false; + // UM is perhaps unavailable, let's check + if (const SCEVConstant *CUB = + collectConstantUpperBound(CurLoop, Delta->getType())) { + UM = CUB->getValue()->getValue(); + DEBUG(dbgs() << "\t UM = " << UM << "\n"); + UMvalid = true; + } + + APInt TU(APInt::getSignedMaxValue(Bits)); + APInt TL(APInt::getSignedMinValue(Bits)); + + // test(BM/G, LM-X) and test(-BM/G, X-UM) + APInt TMUL = BM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (UMvalid) { + TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (UMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + + // test(AM/G, LM-Y) and test(-AM/G, Y-UM) + TMUL = AM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (UMvalid) { + TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (UMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + if (TL.sgt(TU)) { + ++ExactSIVindependence; + ++ExactSIVsuccesses; + return true; + } + + // explore directions + unsigned NewDirection = Dependence::DVEntry::NONE; + + // less than + APInt SaveTU(TU); // save these + APInt SaveTL(TL); + DEBUG(dbgs() << "\t exploring LT direction\n"); + TMUL = AM - BM; + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::LT; + ++ExactSIVsuccesses; + } + + // equal + TU = SaveTU; // restore + TL = SaveTL; + DEBUG(dbgs() << "\t exploring EQ direction\n"); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + TMUL = BM - AM; + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::EQ; + ++ExactSIVsuccesses; + } + + // greater than + TU = SaveTU; // restore + TL = SaveTL; + DEBUG(dbgs() << "\t exploring GT direction\n"); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::GT; + ++ExactSIVsuccesses; + } + + // finished + Result.DV[Level].Direction &= NewDirection; + if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) + ++ExactSIVindependence; + return Result.DV[Level].Direction == Dependence::DVEntry::NONE; +} + + + +// Return true if the divisor evenly divides the dividend. +static +bool isRemainderZero(const SCEVConstant *Dividend, + const SCEVConstant *Divisor) { + APInt ConstDividend = Dividend->getValue()->getValue(); + APInt ConstDivisor = Divisor->getValue()->getValue(); + return ConstDividend.srem(ConstDivisor) == 0; +} + + +// weakZeroSrcSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1] and [c2 + a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Zero SIV test. +// +// Given +// +// c1 = c2 + a*i +// +// we get +// +// (c1 - c2)/a = i +// +// If i is not an integer, there's no dependence. +// If i < 0 or > UB, there's no dependence. +// If i = 0, the direction is <= and peeling the +// 1st iteration will break the dependence. +// If i = UB, the direction is >= and peeling the +// last iteration will break the dependence. +// Otherwise, the direction is *. +// +// Can prove independence. Failing that, we can sometimes refine +// the directions. Can sometimes show that first or last +// iteration carries all the dependences (so worth peeling). +// +// (see also weakZeroDstSIVtest) +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + // For the WeakSIV test, it's possible the loop isn't common to + // the Src and Dst loops. If it isn't, then there's no need to + // record a direction. + DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakZeroSIVapplications; + assert(0 < Level && Level <= MaxLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), + DstCoeff, Delta, CurLoop); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::LE; + Result.DV[Level].PeelFirst = true; + ++WeakZeroSIVsuccesses; + } + return false; // dependences caused by first iteration + } + const SCEVConstant *ConstCoeff = dyn_cast(DstCoeff); + if (!ConstCoeff) + return false; + const SCEV *AbsCoeff = + SE->isKnownNegative(ConstCoeff) ? + SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; + const SCEV *NewDelta = + SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; + + // check that Delta/SrcCoeff < iteration count + // really check NewDelta < count*AbsCoeff + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); + if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { + // dependences caused by last iteration + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::GE; + Result.DV[Level].PeelLast = true; + ++WeakZeroSIVsuccesses; + } + return false; + } + } + + // check that Delta/SrcCoeff >= 0 + // really check that NewDelta >= 0 + if (SE->isKnownNegative(NewDelta)) { + // No dependence, newDelta < 0 + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + + // if SrcCoeff doesn't divide Delta, then no dependence + if (isa(Delta) && + !isRemainderZero(cast(Delta), ConstCoeff)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + return false; +} + + +// weakZeroDstSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Zero SIV test. +// +// Given +// +// c1 + a*i = c2 +// +// we get +// +// i = (c2 - c1)/a +// +// If i is not an integer, there's no dependence. +// If i < 0 or > UB, there's no dependence. +// If i = 0, the direction is <= and peeling the +// 1st iteration will break the dependence. +// If i = UB, the direction is >= and peeling the +// last iteration will break the dependence. +// Otherwise, the direction is *. +// +// Can prove independence. Failing that, we can sometimes refine +// the directions. Can sometimes show that first or last +// iteration carries all the dependences (so worth peeling). +// +// (see also weakZeroSrcSIVtest) +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + // For the WeakSIV test, it's possible the loop isn't common to the + // Src and Dst loops. If it isn't, then there's no need to record a direction. + DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakZeroSIVapplications; + assert(0 < Level && Level <= SrcLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), + Delta, CurLoop); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::LE; + Result.DV[Level].PeelFirst = true; + ++WeakZeroSIVsuccesses; + } + return false; // dependences caused by first iteration + } + const SCEVConstant *ConstCoeff = dyn_cast(SrcCoeff); + if (!ConstCoeff) + return false; + const SCEV *AbsCoeff = + SE->isKnownNegative(ConstCoeff) ? + SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; + const SCEV *NewDelta = + SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; + + // check that Delta/SrcCoeff < iteration count + // really check NewDelta < count*AbsCoeff + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); + if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { + // dependences caused by last iteration + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::GE; + Result.DV[Level].PeelLast = true; + ++WeakZeroSIVsuccesses; + } + return false; + } + } + + // check that Delta/SrcCoeff >= 0 + // really check that NewDelta >= 0 + if (SE->isKnownNegative(NewDelta)) { + // No dependence, newDelta < 0 + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + + // if SrcCoeff doesn't divide Delta, then no dependence + if (isa(Delta) && + !isRemainderZero(cast(Delta), ConstCoeff)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + return false; +} + + +// exactRDIVtest - Tests the RDIV subscript pair for dependence. +// Things of the form [c1 + a*i] and [c2 + b*j], +// where i and j are induction variable, c1 and c2 are loop invariant, +// and a and b are constants. +// Returns true if any possible dependence is disproved. +// Marks the result as inconsistant. +// Works in some cases that symbolicRDIVtest doesn't, and vice versa. +bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *SrcLoop, + const Loop *DstLoop, + FullDependence &Result) const { + DEBUG(dbgs() << "\tExact RDIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++ExactRDIVapplications; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + const SCEVConstant *ConstDelta = dyn_cast(Delta); + const SCEVConstant *ConstSrcCoeff = dyn_cast(SrcCoeff); + const SCEVConstant *ConstDstCoeff = dyn_cast(DstCoeff); + if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) + return false; + + // find gcd + APInt G, X, Y; + APInt AM = ConstSrcCoeff->getValue()->getValue(); + APInt BM = ConstDstCoeff->getValue()->getValue(); + unsigned Bits = AM.getBitWidth(); + if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + // gcd doesn't divide Delta, no dependence + ++ExactRDIVindependence; + return true; + } + + DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + + // since SCEV construction seems to normalize, LM = 0 + APInt SrcUM(Bits, 1, true); + bool SrcUMvalid = false; + // SrcUM is perhaps unavailable, let's check + if (const SCEVConstant *UpperBound = + collectConstantUpperBound(SrcLoop, Delta->getType())) { + SrcUM = UpperBound->getValue()->getValue(); + DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); + SrcUMvalid = true; + } + + APInt DstUM(Bits, 1, true); + bool DstUMvalid = false; + // UM is perhaps unavailable, let's check + if (const SCEVConstant *UpperBound = + collectConstantUpperBound(DstLoop, Delta->getType())) { + DstUM = UpperBound->getValue()->getValue(); + DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); + DstUMvalid = true; + } + + APInt TU(APInt::getSignedMaxValue(Bits)); + APInt TL(APInt::getSignedMinValue(Bits)); + + // test(BM/G, LM-X) and test(-BM/G, X-UM) + APInt TMUL = BM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (SrcUMvalid) { + TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (SrcUMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + + // test(AM/G, LM-Y) and test(-AM/G, Y-UM) + TMUL = AM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (DstUMvalid) { + TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (DstUMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + if (TL.sgt(TU)) + ++ExactRDIVindependence; + return TL.sgt(TU); +} + + +// symbolicRDIVtest - +// In Section 4.5 of the Practical Dependence Testing paper,the authors +// introduce a special case of Banerjee's Inequalities (also called the +// Extreme-Value Test) that can handle some of the SIV and RDIV cases, +// particularly cases with symbolics. Since it's only able to disprove +// dependence (not compute distances or directions), we'll use it as a +// fall back for the other tests. +// +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] +// where i and j are induction variables and c1 and c2 are loop invariants, +// we can use the symbolic tests to disprove some dependences, serving as a +// backup for the RDIV test. Note that i and j can be the same variable, +// letting this test serve as a backup for the various SIV tests. +// +// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some +// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) +// loop bounds for the i and j loops, respectively. So, ... +// +// c1 + a1*i = c2 + a2*j +// a1*i - a2*j = c2 - c1 +// +// To test for a dependence, we compute c2 - c1 and make sure it's in the +// range of the maximum and minimum possible values of a1*i - a2*j. +// Considering the signs of a1 and a2, we have 4 possible cases: +// +// 1) If a1 >= 0 and a2 >= 0, then +// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 +// -a2*N2 <= c2 - c1 <= a1*N1 +// +// 2) If a1 >= 0 and a2 <= 0, then +// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 +// 0 <= c2 - c1 <= a1*N1 - a2*N2 +// +// 3) If a1 <= 0 and a2 >= 0, then +// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 +// a1*N1 - a2*N2 <= c2 - c1 <= 0 +// +// 4) If a1 <= 0 and a2 <= 0, then +// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 +// a1*N1 <= c2 - c1 <= -a2*N2 +// +// return true if dependence disproved +bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, + const SCEV *A2, + const SCEV *C1, + const SCEV *C2, + const Loop *Loop1, + const Loop *Loop2) const { + ++SymbolicRDIVapplications; + DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); + DEBUG(dbgs() << "\t A1 = " << *A1); + DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); + DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); + DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); + DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); + const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); + const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); + DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); + DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); + const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); + const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); + DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); + DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); + if (SE->isKnownNonNegative(A1)) { + if (SE->isKnownNonNegative(A2)) { + // A1 >= 0 && A2 >= 0 + if (N1) { + // make sure that c2 - c1 <= a1*N1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { + ++SymbolicRDIVindependence; + return true; + } + } + if (N2) { + // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + else if (SE->isKnownNonPositive(A2)) { + // a1 >= 0 && a2 <= 0 + if (N1 && N2) { + // make sure that c2 - c1 <= a1*N1 - a2*N2 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); + DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { + ++SymbolicRDIVindependence; + return true; + } + } + // make sure that 0 <= c2 - c1 + if (SE->isKnownNegative(C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + else if (SE->isKnownNonPositive(A1)) { + if (SE->isKnownNonNegative(A2)) { + // a1 <= 0 && a2 >= 0 + if (N1 && N2) { + // make sure that a1*N1 - a2*N2 <= c2 - c1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); + DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + // make sure that c2 - c1 <= 0 + if (SE->isKnownPositive(C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + else if (SE->isKnownNonPositive(A2)) { + // a1 <= 0 && a2 <= 0 + if (N1) { + // make sure that a1*N1 <= c2 - c1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + if (N2) { + // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + } + return false; +} + + +// testSIV - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] +// where i is an induction variable, c1 and c2 are loop invariant, and a1 and +// a2 are constant, we attack it with an SIV test. While they can all be +// solved with the Exact SIV test, it's worthwhile to use simpler tests when +// they apply; they're cheaper and sometimes more precise. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testSIV(const SCEV *Src, + const SCEV *Dst, + unsigned &Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + const SCEVAddRecExpr *SrcAddRec = dyn_cast(Src); + const SCEVAddRecExpr *DstAddRec = dyn_cast(Dst); + if (SrcAddRec && DstAddRec) { + const SCEV *SrcConst = SrcAddRec->getStart(); + const SCEV *DstConst = DstAddRec->getStart(); + const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); + const Loop *CurLoop = SrcAddRec->getLoop(); + assert(CurLoop == DstAddRec->getLoop() && + "both loops in SIV should be same"); + Level = mapSrcLoop(CurLoop); + bool disproven; + if (SrcCoeff == DstCoeff) + disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint); + else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) + disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint, SplitIter); + else + disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint); + return disproven || + gcdMIVtest(Src, Dst, Result) || + symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); + } + if (SrcAddRec) { + const SCEV *SrcConst = SrcAddRec->getStart(); + const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + const SCEV *DstConst = Dst; + const Loop *CurLoop = SrcAddRec->getLoop(); + Level = mapSrcLoop(CurLoop); + return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); + } + if (DstAddRec) { + const SCEV *DstConst = DstAddRec->getStart(); + const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); + const SCEV *SrcConst = Src; + const Loop *CurLoop = DstAddRec->getLoop(); + Level = mapDstLoop(CurLoop); + return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, + CurLoop, Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); + } + llvm_unreachable("SIV test expected at least one AddRec"); + return false; +} + + +// testRDIV - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] +// where i and j are induction variables, c1 and c2 are loop invariant, +// and a1 and a2 are constant, we can solve it exactly with an easy adaptation +// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. +// It doesn't make sense to talk about distance or direction in this case, +// so there's no point in making special versions of the Strong SIV test or +// the Weak-crossing SIV test. +// +// With minor algebra, this test can also be used for things like +// [c1 + a1*i + a2*j][c2]. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testRDIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + // we have 3 possible situations here: + // 1) [a*i + b] and [c*j + d] + // 2) [a*i + c*j + b] and [d] + // 3) [b] and [a*i + c*j + d] + // We need to find what we've got and get organized + + const SCEV *SrcConst, *DstConst; + const SCEV *SrcCoeff, *DstCoeff; + const Loop *SrcLoop, *DstLoop; + + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + const SCEVAddRecExpr *SrcAddRec = dyn_cast(Src); + const SCEVAddRecExpr *DstAddRec = dyn_cast(Dst); + if (SrcAddRec && DstAddRec) { + SrcConst = SrcAddRec->getStart(); + SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + SrcLoop = SrcAddRec->getLoop(); + DstConst = DstAddRec->getStart(); + DstCoeff = DstAddRec->getStepRecurrence(*SE); + DstLoop = DstAddRec->getLoop(); + } + else if (SrcAddRec) { + if (const SCEVAddRecExpr *tmpAddRec = + dyn_cast(SrcAddRec->getStart())) { + SrcConst = tmpAddRec->getStart(); + SrcCoeff = tmpAddRec->getStepRecurrence(*SE); + SrcLoop = tmpAddRec->getLoop(); + DstConst = Dst; + DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); + DstLoop = SrcAddRec->getLoop(); + } + else + llvm_unreachable("RDIV reached by surprising SCEVs"); + } + else if (DstAddRec) { + if (const SCEVAddRecExpr *tmpAddRec = + dyn_cast(DstAddRec->getStart())) { + DstConst = tmpAddRec->getStart(); + DstCoeff = tmpAddRec->getStepRecurrence(*SE); + DstLoop = tmpAddRec->getLoop(); + SrcConst = Src; + SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); + SrcLoop = DstAddRec->getLoop(); + } + else + llvm_unreachable("RDIV reached by surprising SCEVs"); + } + else + llvm_unreachable("RDIV expected at least one AddRec"); + return exactRDIVtest(SrcCoeff, DstCoeff, + SrcConst, DstConst, + SrcLoop, DstLoop, + Result) || + gcdMIVtest(Src, Dst, Result) || + symbolicRDIVtest(SrcCoeff, DstCoeff, + SrcConst, DstConst, + SrcLoop, DstLoop); +} + + +// Tests the single-subscript MIV pair (Src and Dst) for dependence. +// Return true if dependence disproved. +// Can sometimes refine direction vectors. +bool DependenceAnalysis::testMIV(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + Result.Consistent = false; + return gcdMIVtest(Src, Dst, Result) || + banerjeeMIVtest(Src, Dst, Loops, Result); +} + + +// Given a product, e.g., 10*X*Y, returns the first constant operand, +// in this case 10. If there is no constant part, returns NULL. +static +const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { + for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { + if (const SCEVConstant *Constant = dyn_cast(Product->getOperand(Op))) + return Constant; + } + return NULL; +} + + +//===----------------------------------------------------------------------===// +// gcdMIVtest - +// Tests an MIV subscript pair for dependence. +// Returns true if any possible dependence is disproved. +// Marks the result as inconsistant. +// Can sometimes disprove the equal direction for 1 or more loops, +// as discussed in Michael Wolfe's book, +// High Performance Compilers for Parallel Computing, page 235. +// +// We spend some effort (code!) to handle cases like +// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, +// but M and N are just loop-invariant variables. +// This should help us handle linearized subscripts; +// also makes this test a useful backup to the various SIV tests. +// +// It occurs to me that the presence of loop-invariant variables +// changes the nature of the test from "greatest common divisor" +// to "a common divisor!" +bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + DEBUG(dbgs() << "starting gcd\n"); + ++GCDapplications; + unsigned BitWidth = Src->getType()->getIntegerBitWidth(); + APInt RunningGCD = APInt::getNullValue(BitWidth); + + // Examine Src coefficients. + // Compute running GCD and record source constant. + // Because we're looking for the constant at the end of the chain, + // we can't quit the loop just because the GCD == 1. + const SCEV *Coefficients = Src; + while (const SCEVAddRecExpr *AddRec = + dyn_cast(Coefficients)) { + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + const SCEVConstant *Constant = dyn_cast(Coeff); + if (const SCEVMulExpr *Product = dyn_cast(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + if (!Constant) + return false; + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + Coefficients = AddRec->getStart(); + } + const SCEV *SrcConst = Coefficients; + + // Examine Dst coefficients. + // Compute running GCD and record destination constant. + // Because we're looking for the constant at the end of the chain, + // we can't quit the loop just because the GCD == 1. + Coefficients = Dst; + while (const SCEVAddRecExpr *AddRec = + dyn_cast(Coefficients)) { + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + const SCEVConstant *Constant = dyn_cast(Coeff); + if (const SCEVMulExpr *Product = dyn_cast(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + if (!Constant) + return false; + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + Coefficients = AddRec->getStart(); + } + const SCEV *DstConst = Coefficients; + + APInt ExtraGCD = APInt::getNullValue(BitWidth); + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << " Delta = " << *Delta << "\n"); + const SCEVConstant *Constant = dyn_cast(Delta); + if (const SCEVAddExpr *Sum = dyn_cast(Delta)) { + // If Delta is a sum of products, we may be able to make further progress. + for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { + const SCEV *Operand = Sum->getOperand(Op); + if (isa(Operand)) { + assert(!Constant && "Surprised to find multiple constants"); + Constant = cast(Operand); + } + else if (isa(Operand)) { + // Search for constant operand to participate in GCD; + // If none found; return false. + const SCEVConstant *ConstOp = + getConstantPart(cast(Operand)); + APInt ConstOpValue = ConstOp->getValue()->getValue(); + ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, + ConstOpValue.abs()); + } + else + return false; + } + } + if (!Constant) + return false; + APInt ConstDelta = cast(Constant)->getValue()->getValue(); + DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); + if (ConstDelta == 0) + return false; + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); + DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); + APInt Remainder = ConstDelta.srem(RunningGCD); + if (Remainder != 0) { + ++GCDindependence; + return true; + } + + // Try to disprove equal directions. + // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], + // the code above can't disprove the dependence because the GCD = 1. + // So we consider what happen if i = i' and what happens if j = j'. + // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], + // which is infeasible, so we can disallow the = direction for the i level. + // Setting j = j' doesn't help matters, so we end up with a direction vector + // of [<>, *] + // + // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], + // we need to remember that the constant part is 5 and the RunningGCD should + // be initialized to ExtraGCD = 30. + DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); + + bool Improved = false; + Coefficients = Src; + while (const SCEVAddRecExpr *AddRec = + dyn_cast(Coefficients)) { + Coefficients = AddRec->getStart(); + const Loop *CurLoop = AddRec->getLoop(); + RunningGCD = ExtraGCD; + const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); + const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); + const SCEV *Inner = Src; + while (RunningGCD != 1 && isa(Inner)) { + AddRec = cast(Inner); + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + if (CurLoop == AddRec->getLoop()) + ; // SrcCoeff == Coeff + else { + if (const SCEVMulExpr *Product = dyn_cast(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else + Constant = cast(Coeff); + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + } + Inner = AddRec->getStart(); + } + Inner = Dst; + while (RunningGCD != 1 && isa(Inner)) { + AddRec = cast(Inner); + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + if (CurLoop == AddRec->getLoop()) + DstCoeff = Coeff; + else { + if (const SCEVMulExpr *Product = dyn_cast(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else + Constant = cast(Coeff); + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + } + Inner = AddRec->getStart(); + } + Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); + if (const SCEVMulExpr *Product = dyn_cast(Delta)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else if (isa(Delta)) + Constant = cast(Delta); + else { + // The difference of the two coefficients might not be a product + // or constant, in which case we give up on this direction. + continue; + } + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); + if (RunningGCD != 0) { + Remainder = ConstDelta.srem(RunningGCD); + DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); + if (Remainder != 0) { + unsigned Level = mapSrcLoop(CurLoop); + Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ; + Improved = true; + } + } + } + if (Improved) + ++GCDsuccesses; + DEBUG(dbgs() << "all done\n"); + return false; +} + + +//===----------------------------------------------------------------------===// +// banerjeeMIVtest - +// Use Banerjee's Inequalities to test an MIV subscript pair. +// (Wolfe, in the race-car book, calls this the Extreme Value Test.) +// Generally follows the discussion in Section 2.5.2 of +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// +// The inequalities given on page 25 are simplified in that loops are +// normalized so that the lower bound is always 0 and the stride is always 1. +// For example, Wolfe gives +// +// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// +// where A_k is the coefficient of the kth index in the source subscript, +// B_k is the coefficient of the kth index in the destination subscript, +// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth +// index, and N_k is the stride of the kth index. Since all loops are normalized +// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the +// equation to +// +// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 +// = (A^-_k - B_k)^- (U_k - 1) - B_k +// +// Similar simplifications are possible for the other equations. +// +// When we can't determine the number of iterations for a loop, +// we use NULL as an indicator for the worst case, infinity. +// When computing the upper bound, NULL denotes +inf; +// for the lower bound, NULL denotes -inf. +// +// Return true if dependence disproved. +bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const { + DEBUG(dbgs() << "starting Banerjee\n"); + ++BanerjeeApplications; + DEBUG(dbgs() << " Src = " << *Src << '\n'); + const SCEV *A0; + CoefficientInfo *A = collectCoeffInfo(Src, true, A0); + DEBUG(dbgs() << " Dst = " << *Dst << '\n'); + const SCEV *B0; + CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); + BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; + const SCEV *Delta = SE->getMinusSCEV(B0, A0); + DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); + + // Compute bounds for all the * directions. + DEBUG(dbgs() << "\tBounds[*]\n"); + for (unsigned K = 1; K <= MaxLevels; ++K) { + Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; + Bound[K].Direction = Dependence::DVEntry::ALL; + Bound[K].DirSet = Dependence::DVEntry::NONE; + findBoundsALL(A, B, Bound, K); +#ifndef NDEBUG + DEBUG(dbgs() << "\t " << K << '\t'); + if (Bound[K].Lower[Dependence::DVEntry::ALL]) + DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[K].Upper[Dependence::DVEntry::ALL]) + DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); +#endif + } + + // Test the *, *, *, ... case. + bool Disproved = false; + if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { + // Explore the direction vector hierarchy. + unsigned DepthExpanded = 0; + unsigned NewDeps = exploreDirections(1, A, B, Bound, + Loops, DepthExpanded, Delta); + if (NewDeps > 0) { + bool Improved = false; + for (unsigned K = 1; K <= CommonLevels; ++K) { + if (Loops[K]) { + unsigned Old = Result.DV[K - 1].Direction; + Result.DV[K - 1].Direction = Old & Bound[K].DirSet; + Improved |= Old != Result.DV[K - 1].Direction; + if (!Result.DV[K - 1].Direction) { + Improved = false; + Disproved = true; + break; + } + } + } + if (Improved) + ++BanerjeeSuccesses; + } + else { + ++BanerjeeIndependence; + Disproved = true; + } + } + else { + ++BanerjeeIndependence; + Disproved = true; + } + delete [] Bound; + delete [] A; + delete [] B; + return Disproved; +} + + +// Hierarchically expands the direction vector +// search space, combining the directions of discovered dependences +// in the DirSet field of Bound. Returns the number of distinct +// dependences discovered. If the dependence is disproved, +// it will return 0. +unsigned DependenceAnalysis::exploreDirections(unsigned Level, + CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + const SmallBitVector &Loops, + unsigned &DepthExpanded, + const SCEV *Delta) const { + if (Level > CommonLevels) { + // record result + DEBUG(dbgs() << "\t["); + for (unsigned K = 1; K <= CommonLevels; ++K) { + if (Loops[K]) { + Bound[K].DirSet |= Bound[K].Direction; +#ifndef NDEBUG + switch (Bound[K].Direction) { + case Dependence::DVEntry::LT: + DEBUG(dbgs() << " <"); + break; + case Dependence::DVEntry::EQ: + DEBUG(dbgs() << " ="); + break; + case Dependence::DVEntry::GT: + DEBUG(dbgs() << " >"); + break; + case Dependence::DVEntry::ALL: + DEBUG(dbgs() << " *"); + break; + default: + llvm_unreachable("unexpected Bound[K].Direction"); + } +#endif + } + } + DEBUG(dbgs() << " ]\n"); + return 1; + } + if (Loops[Level]) { + if (Level > DepthExpanded) { + DepthExpanded = Level; + // compute bounds for <, =, > at current level + findBoundsLT(A, B, Bound, Level); + findBoundsGT(A, B, Bound, Level); + findBoundsEQ(A, B, Bound, Level); +#ifndef NDEBUG + DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); + DEBUG(dbgs() << "\t <\t"); + if (Bound[Level].Lower[Dependence::DVEntry::LT]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::LT]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); + DEBUG(dbgs() << "\t =\t"); + if (Bound[Level].Lower[Dependence::DVEntry::EQ]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::EQ]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); + DEBUG(dbgs() << "\t >\t"); + if (Bound[Level].Lower[Dependence::DVEntry::GT]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::GT]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); +#endif + } + + unsigned NewDeps = 0; + + // test bounds for <, *, *, ... + if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + // Test bounds for =, *, *, ... + if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + // test bounds for >, *, *, ... + if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + Bound[Level].Direction = Dependence::DVEntry::ALL; + return NewDeps; + } + else + return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); +} + + +// Returns true iff the current bounds are plausible. +bool DependenceAnalysis::testBounds(unsigned char DirKind, + unsigned Level, + BoundInfo *Bound, + const SCEV *Delta) const { + Bound[Level].Direction = DirKind; + if (const SCEV *LowerBound = getLowerBound(Bound)) + if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) + return false; + if (const SCEV *UpperBound = getUpperBound(Bound)) + if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) + return false; + return true; +} + + +// Computes the upper and lower bounds for level K +// using the * direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k +// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^*_k = (A^-_k - B^+_k)U_k +// UB^*_k = (A^+_k - B^-_k)U_k +// +// We must be careful to handle the case where the upper bound is unknown. +// Note that the lower bound is always <= 0 +// and the upper bound is always >= 0. +void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + Bound[K].Lower[Dependence::DVEntry::ALL] = + SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), + Bound[K].Iterations); + Bound[K].Upper[Dependence::DVEntry::ALL] = + SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), + Bound[K].Iterations); + } + else { + // If the difference is 0, we won't need to know the number of iterations. + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) + Bound[K].Lower[Dependence::DVEntry::ALL] = + SE->getConstant(A[K].Coeff->getType(), 0); + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) + Bound[K].Upper[Dependence::DVEntry::ALL] = + SE->getConstant(A[K].Coeff->getType(), 0); + } +} + + +// Computes the upper and lower bounds for level K +// using the = direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k +// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^=_k = (A_k - B_k)^- U_k +// UB^=_k = (A_k - B_k)^+ U_k +// +// We must be careful to handle the case where the upper bound is unknown. +// Note that the lower bound is always <= 0 +// and the upper bound is always >= 0. +void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); + const SCEV *NegativePart = getNegativePart(Delta); + Bound[K].Lower[Dependence::DVEntry::EQ] = + SE->getMulExpr(NegativePart, Bound[K].Iterations); + const SCEV *PositivePart = getPositivePart(Delta); + Bound[K].Upper[Dependence::DVEntry::EQ] = + SE->getMulExpr(PositivePart, Bound[K].Iterations); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); + const SCEV *NegativePart = getNegativePart(Delta); + if (NegativePart->isZero()) + Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero + const SCEV *PositivePart = getPositivePart(Delta); + if (PositivePart->isZero()) + Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero + } +} + + +// Computes the upper and lower bounds for level K +// using the < direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k +// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k +// +// We must be careful to handle the case where the upper bound is unknown. +void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Iter_1 = + SE->getMinusSCEV(Bound[K].Iterations, + SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); + Bound[K].Lower[Dependence::DVEntry::LT] = + SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); + Bound[K].Upper[Dependence::DVEntry::LT] = + SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); + if (NegPart->isZero()) + Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); + if (PosPart->isZero()) + Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); + } +} + + +// Computes the upper and lower bounds for level K +// using the > direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k +// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k +// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k +// +// We must be careful to handle the case where the upper bound is unknown. +void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Iter_1 = + SE->getMinusSCEV(Bound[K].Iterations, + SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); + Bound[K].Lower[Dependence::DVEntry::GT] = + SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); + Bound[K].Upper[Dependence::DVEntry::GT] = + SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); + if (NegPart->isZero()) + Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; + const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); + if (PosPart->isZero()) + Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; + } +} + + +// X^+ = max(X, 0) +const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { + return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); +} + + +// X^- = min(X, 0) +const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { + return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); +} + + +// Walks through the subscript, +// collecting each coefficient, the associated loop bounds, +// and recording its positive and negative parts for later use. +DependenceAnalysis::CoefficientInfo * +DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, + bool SrcFlag, + const SCEV *&Constant) const { + const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); + CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; + for (unsigned K = 1; K <= MaxLevels; ++K) { + CI[K].Coeff = Zero; + CI[K].PosPart = Zero; + CI[K].NegPart = Zero; + CI[K].Iterations = NULL; + } + while (const SCEVAddRecExpr *AddRec = dyn_cast(Subscript)) { + const Loop *L = AddRec->getLoop(); + unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); + CI[K].Coeff = AddRec->getStepRecurrence(*SE); + CI[K].PosPart = getPositivePart(CI[K].Coeff); + CI[K].NegPart = getNegativePart(CI[K].Coeff); + CI[K].Iterations = collectUpperBound(L, Subscript->getType()); + Subscript = AddRec->getStart(); + } + Constant = Subscript; +#ifndef NDEBUG + DEBUG(dbgs() << "\tCoefficient Info\n"); + for (unsigned K = 1; K <= MaxLevels; ++K) { + DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); + DEBUG(dbgs() << "\tPos Part = "); + DEBUG(dbgs() << *CI[K].PosPart); + DEBUG(dbgs() << "\tNeg Part = "); + DEBUG(dbgs() << *CI[K].NegPart); + DEBUG(dbgs() << "\tUpper Bound = "); + if (CI[K].Iterations) + DEBUG(dbgs() << *CI[K].Iterations); + else + DEBUG(dbgs() << "+inf"); + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); +#endif + return CI; +} + + +// Looks through all the bounds info and +// computes the lower bound given the current direction settings +// at each level. If the lower bound for any level is -inf, +// the result is -inf. +const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { + const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; + for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { + if (Bound[K].Lower[Bound[K].Direction]) + Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); + else + Sum = NULL; + } + return Sum; +} + + +// Looks through all the bounds info and +// computes the upper bound given the current direction settings +// at each level. If the upper bound at any level is +inf, +// the result is +inf. +const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { + const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; + for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { + if (Bound[K].Upper[Bound[K].Direction]) + Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); + else + Sum = NULL; + } + return Sum; +} + + +//===----------------------------------------------------------------------===// +// Constraint manipulation for Delta test. + +// Given a linear SCEV, +// return the coefficient (the step) +// corresponding to the specified loop. +// If there isn't one, return 0. +// For example, given a*i + b*j + c*k, zeroing the coefficient +// corresponding to the j loop would yield b. +const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (!AddRec) + return SE->getConstant(Expr->getType(), 0); + if (AddRec->getLoop() == TargetLoop) + return AddRec->getStepRecurrence(*SE); + return findCoefficient(AddRec->getStart(), TargetLoop); +} + + +// Given a linear SCEV, +// return the SCEV given by zeroing out the coefficient +// corresponding to the specified loop. +// For example, given a*i + b*j + c*k, zeroing the coefficient +// corresponding to the j loop would yield a*i + c*k. +const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (!AddRec) + return Expr; // ignore + if (AddRec->getLoop() == TargetLoop) + return AddRec->getStart(); + return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), + AddRec->getStepRecurrence(*SE), + AddRec->getLoop(), + AddRec->getNoWrapFlags()); +} + + +// Given a linear SCEV Expr, +// return the SCEV given by adding some Value to the +// coefficient corresponding to the specified TargetLoop. +// For example, given a*i + b*j + c*k, adding 1 to the coefficient +// corresponding to the j loop would yield a*i + (b+1)*j + c*k. +const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, + const Loop *TargetLoop, + const SCEV *Value) const { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (!AddRec) // create a new addRec + return SE->getAddRecExpr(Expr, + Value, + TargetLoop, + SCEV::FlagAnyWrap); // Worst case, with no info. + if (AddRec->getLoop() == TargetLoop) { + const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); + if (Sum->isZero()) + return AddRec->getStart(); + return SE->getAddRecExpr(AddRec->getStart(), + Sum, + AddRec->getLoop(), + AddRec->getNoWrapFlags()); + } + return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), + TargetLoop, Value), + AddRec->getStepRecurrence(*SE), + AddRec->getLoop(), + AddRec->getNoWrapFlags()); +} + + +// Review the constraints, looking for opportunities +// to simplify a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +// Corresponds to Figure 5 from the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +bool DependenceAnalysis::propagate(const SCEV *&Src, + const SCEV *&Dst, + SmallBitVector &Loops, + SmallVector &Constraints, + bool &Consistent) { + bool Result = false; + for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { + DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); + DEBUG(Constraints[LI].dump(dbgs())); + if (Constraints[LI].isDistance()) + Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); + else if (Constraints[LI].isLine()) + Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); + else if (Constraints[LI].isPoint()) + Result |= propagatePoint(Src, Dst, Constraints[LI]); + } + return Result; +} + + +// Attempt to propagate a distance +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +bool DependenceAnalysis::propagateDistance(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + if (A_K->isZero()) + return false; + const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); + Src = SE->getMinusSCEV(Src, DA_K); + Src = zeroCoefficient(Src, CurLoop); + DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); + DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + return true; +} + + +// Attempt to propagate a line +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +bool DependenceAnalysis::propagateLine(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const SCEV *A = CurConstraint.getA(); + const SCEV *B = CurConstraint.getB(); + const SCEV *C = CurConstraint.getC(); + DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); + DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); + if (A->isZero()) { + const SCEVConstant *Bconst = dyn_cast(B); + const SCEVConstant *Cconst = dyn_cast(C); + if (!Bconst || !Cconst) return false; + APInt Beta = Bconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivB = Charlie.sdiv(Beta); + assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); + const SCEV *AP_K = findCoefficient(Dst, CurLoop); + // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); + Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); + Dst = zeroCoefficient(Dst, CurLoop); + if (!findCoefficient(Src, CurLoop)->isZero()) + Consistent = false; + } + else if (B->isZero()) { + const SCEVConstant *Aconst = dyn_cast(A); + const SCEVConstant *Cconst = dyn_cast(C); + if (!Aconst || !Cconst) return false; + APInt Alpha = Aconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivA = Charlie.sdiv(Alpha); + assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); + Src = zeroCoefficient(Src, CurLoop); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { + const SCEVConstant *Aconst = dyn_cast(A); + const SCEVConstant *Cconst = dyn_cast(C); + if (!Aconst || !Cconst) return false; + APInt Alpha = Aconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivA = Charlie.sdiv(Alpha); + assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); + Src = zeroCoefficient(Src, CurLoop); + Dst = addToCoefficient(Dst, CurLoop, A_K); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + else { + // paper is incorrect here, or perhaps just misleading + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getMulExpr(Src, A); + Dst = SE->getMulExpr(Dst, A); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); + Src = zeroCoefficient(Src, CurLoop); + Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); + DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); + return true; +} + + +// Attempt to propagate a point +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +bool DependenceAnalysis::propagatePoint(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const SCEV *A_K = findCoefficient(Src, CurLoop); + const SCEV *AP_K = findCoefficient(Dst, CurLoop); + const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); + const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); + DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); + Src = zeroCoefficient(Src, CurLoop); + DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + Dst = zeroCoefficient(Dst, CurLoop); + DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + return true; +} + + +// Update direction vector entry based on the current constraint. +void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, + const Constraint &CurConstraint + ) const { + DEBUG(dbgs() << "\tUpdate direction, constraint ="); + DEBUG(CurConstraint.dump(dbgs())); + if (CurConstraint.isAny()) + ; // use defaults + else if (CurConstraint.isDistance()) { + // this one is consistent, the others aren't + Level.Scalar = false; + Level.Distance = CurConstraint.getD(); + unsigned NewDirection = Dependence::DVEntry::NONE; + if (!SE->isKnownNonZero(Level.Distance)) // if may be zero + NewDirection = Dependence::DVEntry::EQ; + if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive + NewDirection |= Dependence::DVEntry::LT; + if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative + NewDirection |= Dependence::DVEntry::GT; + Level.Direction &= NewDirection; + } + else if (CurConstraint.isLine()) { + Level.Scalar = false; + Level.Distance = NULL; + // direction should be accurate + } + else if (CurConstraint.isPoint()) { + Level.Scalar = false; + Level.Distance = NULL; + unsigned NewDirection = Dependence::DVEntry::NONE; + if (!isKnownPredicate(CmpInst::ICMP_NE, + CurConstraint.getY(), + CurConstraint.getX())) + // if X may be = Y + NewDirection |= Dependence::DVEntry::EQ; + if (!isKnownPredicate(CmpInst::ICMP_SLE, + CurConstraint.getY(), + CurConstraint.getX())) + // if Y may be > X + NewDirection |= Dependence::DVEntry::LT; + if (!isKnownPredicate(CmpInst::ICMP_SGE, + CurConstraint.getY(), + CurConstraint.getX())) + // if Y may be < X + NewDirection |= Dependence::DVEntry::GT; + Level.Direction &= NewDirection; + } + else + llvm_unreachable("constraint has unexpected kind"); +} + + +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +// For debugging purposes, dump a small bit vector to dbgs(). +static void dumpSmallBitVector(SmallBitVector &BV) { + dbgs() << "{"; + for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { + dbgs() << VI; + if (BV.find_next(VI) >= 0) + dbgs() << ' '; + } + dbgs() << "}\n"; +} +#endif + + +// depends - +// Returns NULL if there is no dependence. +// Otherwise, return a Dependence with as many details as possible. +// Corresponds to Section 3.1 in the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// Care is required to keep the code below up to date w.r.t. this routine. +Dependence *DependenceAnalysis::depends(const Instruction *Src, + const Instruction *Dst, + bool PossiblyLoopIndependent) { + if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || + (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) + // if both instructions don't reference memory, there's no dependence + return NULL; + + if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) + // can only analyze simple loads and stores, i.e., no calls, invokes, etc. + return new Dependence(Src, Dst); + + const Value *SrcPtr = getPointerOperand(Src); + const Value *DstPtr = getPointerOperand(Dst); + + switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + // cannot analyse objects if we don't understand their aliasing. + return new Dependence(Src, Dst); + case AliasAnalysis::NoAlias: + // If the objects noalias, they are distinct, accesses are independent. + return NULL; + case AliasAnalysis::MustAlias: + break; // The underlying objects alias; test accesses for dependence. + } + + const GEPOperator *SrcGEP = dyn_cast(SrcPtr); + const GEPOperator *DstGEP = dyn_cast(DstPtr); + if (!SrcGEP || !DstGEP) + return new Dependence(Src, Dst); // missing GEP, assume dependence + + if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType()) + return new Dependence(Src, Dst); // different types, assume dependence + + // establish loop nesting levels + establishNestingLevels(Src, Dst); + DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); + DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); + + FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); + ++TotalArrayPairs; + + // classify subscript pairs + unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + SmallVector Pair(Pairs); + for (unsigned SI = 0; SI < Pairs; ++SI) { + Pair[SI].Loops.resize(MaxLevels + 1); + Pair[SI].GroupLoops.resize(MaxLevels + 1); + Pair[SI].Group.resize(Pairs); + } + Pairs = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(), + DstEnd = DstGEP->idx_end(); + SrcIdx != SrcEnd && DstIdx != DstEnd; + ++SrcIdx, ++DstIdx, ++Pairs) { + Pair[Pairs].Src = SE->getSCEV(*SrcIdx); + Pair[Pairs].Dst = SE->getSCEV(*DstIdx); + removeMatchingExtensions(&Pair[Pairs]); + Pair[Pairs].Classification = + classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), + Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), + Pair[Pairs].Loops); + Pair[Pairs].GroupLoops = Pair[Pairs].Loops; + Pair[Pairs].Group.set(Pairs); + DEBUG(dbgs() << " subscript " << Pairs << "\n"); + DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n"); + DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n"); + DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n"); + DEBUG(dbgs() << "\tloops = "); + DEBUG(dumpSmallBitVector(Pair[Pairs].Loops)); + } + + SmallBitVector Separable(Pairs); + SmallBitVector Coupled(Pairs); + + // Partition subscripts into separable and minimally-coupled groups + // Algorithm in paper is algorithmically better; + // this may be faster in practice. Check someday. + // + // Here's an example of how it works. Consider this code: + // + // for (i = ...) { + // for (j = ...) { + // for (k = ...) { + // for (l = ...) { + // for (m = ...) { + // A[i][j][k][m] = ...; + // ... = A[0][j][l][i + j]; + // } + // } + // } + // } + // } + // + // There are 4 subscripts here: + // 0 [i] and [0] + // 1 [j] and [j] + // 2 [k] and [l] + // 3 [m] and [i + j] + // + // We've already classified each subscript pair as ZIV, SIV, etc., + // and collected all the loops mentioned by pair P in Pair[P].Loops. + // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops + // and set Pair[P].Group = {P}. + // + // Src Dst Classification Loops GroupLoops Group + // 0 [i] [0] SIV {1} {1} {0} + // 1 [j] [j] SIV {2} {2} {1} + // 2 [k] [l] RDIV {3,4} {3,4} {2} + // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} + // + // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. + // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. + // + // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. + // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. + // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, + // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added + // to either Separable or Coupled). + // + // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. + // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, + // so Pair[3].Group = {0, 1, 3} and Done = false. + // + // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. + // Since Done remains true, we add 2 to the set of Separable pairs. + // + // Finally, we consider 3. There's nothing to compare it with, + // so Done remains true and we add it to the Coupled set. + // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. + // + // In the end, we've got 1 separable subscript and 1 coupled group. + for (unsigned SI = 0; SI < Pairs; ++SI) { + if (Pair[SI].Classification == Subscript::NonLinear) { + // ignore these, but collect loops for later + ++NonlinearSubscriptPairs; + collectCommonLoops(Pair[SI].Src, + LI->getLoopFor(Src->getParent()), + Pair[SI].Loops); + collectCommonLoops(Pair[SI].Dst, + LI->getLoopFor(Dst->getParent()), + Pair[SI].Loops); + Result.Consistent = false; + } + else if (Pair[SI].Classification == Subscript::ZIV) { + // always separable + Separable.set(SI); + } + else { + // SIV, RDIV, or MIV, so check for coupled group + bool Done = true; + for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { + SmallBitVector Intersection = Pair[SI].GroupLoops; + Intersection &= Pair[SJ].GroupLoops; + if (Intersection.any()) { + // accumulate set of all the loops in group + Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; + // accumulate set of all subscripts in group + Pair[SJ].Group |= Pair[SI].Group; + Done = false; + } + } + if (Done) { + if (Pair[SI].Group.count() == 1) { + Separable.set(SI); + ++SeparableSubscriptPairs; + } + else { + Coupled.set(SI); + ++CoupledSubscriptPairs; + } + } + } + } + + DEBUG(dbgs() << " Separable = "); + DEBUG(dumpSmallBitVector(Separable)); + DEBUG(dbgs() << " Coupled = "); + DEBUG(dumpSmallBitVector(Coupled)); + + Constraint NewConstraint; + NewConstraint.setAny(SE); + + // test separable subscripts + for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + DEBUG(dbgs() << "testing subscript " << SI); + switch (Pair[SI].Classification) { + case Subscript::ZIV: + DEBUG(dbgs() << ", ZIV\n"); + if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return NULL; + break; + case Subscript::SIV: { + DEBUG(dbgs() << ", SIV\n"); + unsigned Level; + const SCEV *SplitIter = NULL; + if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, + Result, NewConstraint, SplitIter)) + return NULL; + break; + } + case Subscript::RDIV: + DEBUG(dbgs() << ", RDIV\n"); + if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return NULL; + break; + case Subscript::MIV: + DEBUG(dbgs() << ", MIV\n"); + if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) + return NULL; + break; + default: + llvm_unreachable("subscript has unexpected classification"); + } + } + + if (Coupled.count()) { + // test coupled subscript groups + DEBUG(dbgs() << "starting on coupled subscripts\n"); + DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); + SmallVector Constraints(MaxLevels + 1); + for (unsigned II = 0; II <= MaxLevels; ++II) + Constraints[II].setAny(SE); + for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + DEBUG(dbgs() << "testing subscript group " << SI << " { "); + SmallBitVector Group(Pair[SI].Group); + SmallBitVector Sivs(Pairs); + SmallBitVector Mivs(Pairs); + SmallBitVector ConstrainedLevels(MaxLevels + 1); + for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + DEBUG(dbgs() << SJ << " "); + if (Pair[SJ].Classification == Subscript::SIV) + Sivs.set(SJ); + else + Mivs.set(SJ); + } + DEBUG(dbgs() << "}\n"); + while (Sivs.any()) { + bool Changed = false; + for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); + // SJ is an SIV subscript that's part of the current coupled group + unsigned Level; + const SCEV *SplitIter = NULL; + DEBUG(dbgs() << "SIV\n"); + if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, + Result, NewConstraint, SplitIter)) + return NULL; + ConstrainedLevels.set(Level); + if (intersectConstraints(&Constraints[Level], &NewConstraint)) { + if (Constraints[Level].isEmpty()) { + ++DeltaIndependence; + return NULL; + } + Changed = true; + } + Sivs.reset(SJ); + } + if (Changed) { + // propagate, possibly creating new SIVs and ZIVs + DEBUG(dbgs() << " propagating\n"); + DEBUG(dbgs() << "\tMivs = "); + DEBUG(dumpSmallBitVector(Mivs)); + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + // SJ is an MIV subscript that's part of the current coupled group + DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); + if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, + Constraints, Result.Consistent)) { + DEBUG(dbgs() << "\t Changed\n"); + ++DeltaPropagations; + Pair[SJ].Classification = + classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), + Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), + Pair[SJ].Loops); + switch (Pair[SJ].Classification) { + case Subscript::ZIV: + DEBUG(dbgs() << "ZIV\n"); + if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return NULL; + Mivs.reset(SJ); + break; + case Subscript::SIV: + Sivs.set(SJ); + Mivs.reset(SJ); + break; + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("bad subscript classification"); + } + } + } + } + } + + // test & propagate remaining RDIVs + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::RDIV) { + DEBUG(dbgs() << "RDIV test\n"); + if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return NULL; + // I don't yet understand how to propagate RDIV results + Mivs.reset(SJ); + } + } + + // test remaining MIVs + // This code is temporary. + // Better to somehow test all remaining subscripts simultaneously. + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::MIV) { + DEBUG(dbgs() << "MIV test\n"); + if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) + return NULL; + } + else + llvm_unreachable("expected only MIV subscripts at this point"); + } + + // update Result.DV from constraint vector + DEBUG(dbgs() << " updating\n"); + for (int SJ = ConstrainedLevels.find_first(); + SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { + updateDirection(Result.DV[SJ - 1], Constraints[SJ]); + if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) + return NULL; + } + } + } + + // make sure Scalar flags are set correctly + SmallBitVector CompleteLoops(MaxLevels + 1); + for (unsigned SI = 0; SI < Pairs; ++SI) + CompleteLoops |= Pair[SI].Loops; + for (unsigned II = 1; II <= CommonLevels; ++II) + if (CompleteLoops[II]) + Result.DV[II - 1].Scalar = false; + + // make sure loopIndepent flag is set correctly + if (PossiblyLoopIndependent) { + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { + Result.LoopIndependent = false; + break; + } + } + } + + FullDependence *Final = new FullDependence(Result); + Result.DV = NULL; + return Final; +} + + + +//===----------------------------------------------------------------------===// +// getSplitIteration - +// Rather than spend rarely-used space recording the splitting iteration +// during the Weak-Crossing SIV test, we re-compute it on demand. +// The re-computation is basically a repeat of the entire dependence test, +// though simplified since we know that the dependence exists. +// It's tedious, since we must go through all propagations, etc. +// +// Care is required to keep this code up to date w.r.t. the code above. +// +// Generally, the dependence analyzer will be used to build +// a dependence graph for a function (basically a map from instructions +// to dependences). Looking for cycles in the graph shows us loops +// that cannot be trivially vectorized/parallelized. +// +// We can try to improve the situation by examining all the dependences +// that make up the cycle, looking for ones we can break. +// Sometimes, peeling the first or last iteration of a loop will break +// dependences, and we've got flags for those possibilities. +// Sometimes, splitting a loop at some other iteration will do the trick, +// and we've got a flag for that case. Rather than waste the space to +// record the exact iteration (since we rarely know), we provide +// a method that calculates the iteration. It's a drag that it must work +// from scratch, but wonderful in that it's possible. +// +// Here's an example: +// +// for (i = 0; i < 10; i++) +// A[i] = ... +// ... = A[11 - i] +// +// There's a loop-carried flow dependence from the store to the load, +// found by the weak-crossing SIV test. The dependence will have a flag, +// indicating that the dependence can be broken by splitting the loop. +// Calling getSplitIteration will return 5. +// Splitting the loop breaks the dependence, like so: +// +// for (i = 0; i <= 5; i++) +// A[i] = ... +// ... = A[11 - i] +// for (i = 6; i < 10; i++) +// A[i] = ... +// ... = A[11 - i] +// +// breaks the dependence and allows us to vectorize/parallelize +// both loops. +const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, + unsigned SplitLevel) { + assert(Dep && "expected a pointer to a Dependence"); + assert(Dep->isSplitable(SplitLevel) && + "Dep should be splitable at SplitLevel"); + const Instruction *Src = Dep->getSrc(); + const Instruction *Dst = Dep->getDst(); + assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); + assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); + assert(isLoadOrStore(Src)); + assert(isLoadOrStore(Dst)); + const Value *SrcPtr = getPointerOperand(Src); + const Value *DstPtr = getPointerOperand(Dst); + assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == + AliasAnalysis::MustAlias); + const GEPOperator *SrcGEP = dyn_cast(SrcPtr); + const GEPOperator *DstGEP = dyn_cast(DstPtr); + assert(SrcGEP); + assert(DstGEP); + assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()); + + // establish loop nesting levels + establishNestingLevels(Src, Dst); + + FullDependence Result(Src, Dst, false, CommonLevels); + + // classify subscript pairs + unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + SmallVector Pair(Pairs); + for (unsigned SI = 0; SI < Pairs; ++SI) { + Pair[SI].Loops.resize(MaxLevels + 1); + Pair[SI].GroupLoops.resize(MaxLevels + 1); + Pair[SI].Group.resize(Pairs); + } + Pairs = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(), + DstEnd = DstGEP->idx_end(); + SrcIdx != SrcEnd && DstIdx != DstEnd; + ++SrcIdx, ++DstIdx, ++Pairs) { + Pair[Pairs].Src = SE->getSCEV(*SrcIdx); + Pair[Pairs].Dst = SE->getSCEV(*DstIdx); + Pair[Pairs].Classification = + classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), + Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), + Pair[Pairs].Loops); + Pair[Pairs].GroupLoops = Pair[Pairs].Loops; + Pair[Pairs].Group.set(Pairs); + } + + SmallBitVector Separable(Pairs); + SmallBitVector Coupled(Pairs); + + // partition subscripts into separable and minimally-coupled groups + for (unsigned SI = 0; SI < Pairs; ++SI) { + if (Pair[SI].Classification == Subscript::NonLinear) { + // ignore these, but collect loops for later + collectCommonLoops(Pair[SI].Src, + LI->getLoopFor(Src->getParent()), + Pair[SI].Loops); + collectCommonLoops(Pair[SI].Dst, + LI->getLoopFor(Dst->getParent()), + Pair[SI].Loops); + Result.Consistent = false; + } + else if (Pair[SI].Classification == Subscript::ZIV) + Separable.set(SI); + else { + // SIV, RDIV, or MIV, so check for coupled group + bool Done = true; + for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { + SmallBitVector Intersection = Pair[SI].GroupLoops; + Intersection &= Pair[SJ].GroupLoops; + if (Intersection.any()) { + // accumulate set of all the loops in group + Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; + // accumulate set of all subscripts in group + Pair[SJ].Group |= Pair[SI].Group; + Done = false; + } + } + if (Done) { + if (Pair[SI].Group.count() == 1) + Separable.set(SI); + else + Coupled.set(SI); + } + } + } + + Constraint NewConstraint; + NewConstraint.setAny(SE); + + // test separable subscripts + for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + switch (Pair[SI].Classification) { + case Subscript::SIV: { + unsigned Level; + const SCEV *SplitIter = NULL; + (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, + Result, NewConstraint, SplitIter); + if (Level == SplitLevel) { + assert(SplitIter != NULL); + return SplitIter; + } + break; + } + case Subscript::ZIV: + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("subscript has unexpected classification"); + } + } + + if (Coupled.count()) { + // test coupled subscript groups + SmallVector Constraints(MaxLevels + 1); + for (unsigned II = 0; II <= MaxLevels; ++II) + Constraints[II].setAny(SE); + for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + SmallBitVector Group(Pair[SI].Group); + SmallBitVector Sivs(Pairs); + SmallBitVector Mivs(Pairs); + SmallBitVector ConstrainedLevels(MaxLevels + 1); + for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::SIV) + Sivs.set(SJ); + else + Mivs.set(SJ); + } + while (Sivs.any()) { + bool Changed = false; + for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + // SJ is an SIV subscript that's part of the current coupled group + unsigned Level; + const SCEV *SplitIter = NULL; + (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, + Result, NewConstraint, SplitIter); + if (Level == SplitLevel && SplitIter) + return SplitIter; + ConstrainedLevels.set(Level); + if (intersectConstraints(&Constraints[Level], &NewConstraint)) + Changed = true; + Sivs.reset(SJ); + } + if (Changed) { + // propagate, possibly creating new SIVs and ZIVs + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + // SJ is an MIV subscript that's part of the current coupled group + if (propagate(Pair[SJ].Src, Pair[SJ].Dst, + Pair[SJ].Loops, Constraints, Result.Consistent)) { + Pair[SJ].Classification = + classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), + Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), + Pair[SJ].Loops); + switch (Pair[SJ].Classification) { + case Subscript::ZIV: + Mivs.reset(SJ); + break; + case Subscript::SIV: + Sivs.set(SJ); + Mivs.reset(SJ); + break; + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("bad subscript classification"); + } + } + } + } + } + } + } + llvm_unreachable("somehow reached end of routine"); + return NULL; +} diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll new file mode 100644 index 0000000..8865ee9 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll @@ -0,0 +1,595 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'Banerjee.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long int i = 1; i <= 10; i++) +;; for (long int j = 1; j <= 10; j++) { +;; A[10*i + j] = ... +;; ... = A[10*i + j - 1]; + +define void @banerjee0(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc7 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] + %i.03 = phi i64 [ 1, %entry ], [ %inc8, %for.inc7 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 1, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 10 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 10 + %add5 = add nsw i64 %mul4, %j.02 + %sub = add nsw i64 %add5, -1 + %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub + %0 = load i64* %arrayidx6, align 8 +; CHECK: da analyze - flow [<= <>]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 11 + br i1 %exitcond, label %for.body3, label %for.inc7 + +for.inc7: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 10 + %inc8 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc8, 11 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end9 + +for.end9: ; preds = %for.inc7 + ret void +} + + +;; for (long int i = 1; i <= n; i++) +;; for (long int j = 1; j <= m; j++) { +;; A[10*i + j] = ... +;; ... = A[10*i + j - 1]; + +define void @banerjee1(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + %cmp4 = icmp sgt i64 %n, 0 + br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end9 + +for.cond1.preheader.preheader: ; preds = %entry + %0 = add i64 %n, 1 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc7 + %B.addr.06 = phi i64* [ %B.addr.1.lcssa, %for.inc7 ], [ %B, %for.cond1.preheader.preheader ] + %i.05 = phi i64 [ %inc8, %for.inc7 ], [ 1, %for.cond1.preheader.preheader ] + %1 = add i64 %m, 1 + %cmp21 = icmp sgt i64 %m, 0 + br i1 %cmp21, label %for.body3.preheader, label %for.inc7 + +for.body3.preheader: ; preds = %for.cond1.preheader + br label %for.body3 + +for.body3: ; preds = %for.body3.preheader, %for.body3 + %j.03 = phi i64 [ %inc, %for.body3 ], [ 1, %for.body3.preheader ] + %B.addr.12 = phi i64* [ %incdec.ptr, %for.body3 ], [ %B.addr.06, %for.body3.preheader ] + %mul = mul nsw i64 %i.05, 10 + %add = add nsw i64 %mul, %j.03 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.05, 10 + %add5 = add nsw i64 %mul4, %j.03 + %sub = add nsw i64 %add5, -1 + %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub + %2 = load i64* %arrayidx6, align 8 +; CHECK: da analyze - flow [* <>]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.12, i64 1 + store i64 %2, i64* %B.addr.12, align 8 + %inc = add nsw i64 %j.03, 1 + %exitcond = icmp eq i64 %inc, %1 + br i1 %exitcond, label %for.inc7.loopexit, label %for.body3 + +for.inc7.loopexit: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.06, i64 %m + br label %for.inc7 + +for.inc7: ; preds = %for.inc7.loopexit, %for.cond1.preheader + %B.addr.1.lcssa = phi i64* [ %B.addr.06, %for.cond1.preheader ], [ %scevgep, %for.inc7.loopexit ] + %inc8 = add nsw i64 %i.05, 1 + %exitcond7 = icmp eq i64 %inc8, %0 + br i1 %exitcond7, label %for.end9.loopexit, label %for.cond1.preheader + +for.end9.loopexit: ; preds = %for.inc7 + br label %for.end9 + +for.end9: ; preds = %for.end9.loopexit, %entry + ret void +} + + +;; for (long int i = 0; i < 10; i++) +;; for (long int j = 0; j < 10; j++) { +;; A[10*i + j] = 0; +;; *B++ = A[10*i + j + 100]; + +define void @banerjee2(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 10 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 10 + %add5 = add nsw i64 %mul4, %j.02 + %add6 = add nsw i64 %add5, 100 + %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6 + %0 = load i64* %arrayidx7, align 8 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 10 + br i1 %exitcond, label %for.body3, label %for.inc8 + +for.inc8: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 10 + %inc9 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc9, 10 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end10 + +for.end10: ; preds = %for.inc8 + ret void +} + + +;; for (long int i = 0; i < 10; i++) +;; for (long int j = 0; j < 10; j++) { +;; A[10*i + j] = ... +;; ... = A[10*i + j + 99]; + +define void @banerjee3(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 10 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 10 + %add5 = add nsw i64 %mul4, %j.02 + %add6 = add nsw i64 %add5, 99 + %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6 + %0 = load i64* %arrayidx7, align 8 +; CHECK: da analyze - flow [> >]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 10 + br i1 %exitcond, label %for.body3, label %for.inc8 + +for.inc8: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 10 + %inc9 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc9, 10 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end10 + +for.end10: ; preds = %for.inc8 + ret void +} + + +;; for (long int i = 0; i < 10; i++) +;; for (long int j = 0; j < 10; j++) { +;; A[10*i + j] = ... +;; ... = A[10*i + j - 100]; + +define void @banerjee4(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc7 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 10 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 10 + %add5 = add nsw i64 %mul4, %j.02 + %sub = add nsw i64 %add5, -100 + %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub + %0 = load i64* %arrayidx6, align 8 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 10 + br i1 %exitcond, label %for.body3, label %for.inc7 + +for.inc7: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 10 + %inc8 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc8, 10 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end9 + +for.end9: ; preds = %for.inc7 + ret void +} + + +;; for (long int i = 0; i < 10; i++) +;; for (long int j = 0; j < 10; j++) { +;; A[10*i + j] = ... +;; ... = A[10*i + j - 99]; + +define void @banerjee5(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc7 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 10 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 10 + %add5 = add nsw i64 %mul4, %j.02 + %sub = add nsw i64 %add5, -99 + %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub + %0 = load i64* %arrayidx6, align 8 +; CHECK: da analyze - flow [< <]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 10 + br i1 %exitcond, label %for.body3, label %for.inc7 + +for.inc7: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 10 + %inc8 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc8, 10 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end9 + +for.end9: ; preds = %for.inc7 + ret void +} + + +;; for (long int i = 0; i < 10; i++) +;; for (long int j = 0; j < 10; j++) { +;; A[10*i + j] = ... +;; ... = A[10*i + j + 9]; + +define void @banerjee6(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 10 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 10 + %add5 = add nsw i64 %mul4, %j.02 + %add6 = add nsw i64 %add5, 9 + %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6 + %0 = load i64* %arrayidx7, align 8 +; CHECK: da analyze - flow [=> <>]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 10 + br i1 %exitcond, label %for.body3, label %for.inc8 + +for.inc8: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 10 + %inc9 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc9, 10 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end10 + +for.end10: ; preds = %for.inc8 + ret void +} + + +;; for (long int i = 0; i < 10; i++) +;; for (long int j = 0; j < 10; j++) { +;; A[10*i + j] = ... +;; ... = A[10*i + j + 10]; + +define void @banerjee7(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 10 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 10 + %add5 = add nsw i64 %mul4, %j.02 + %add6 = add nsw i64 %add5, 10 + %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6 + %0 = load i64* %arrayidx7, align 8 +; CHECK: da analyze - flow [> <=]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 10 + br i1 %exitcond, label %for.body3, label %for.inc8 + +for.inc8: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 10 + %inc9 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc9, 10 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end10 + +for.end10: ; preds = %for.inc8 + ret void +} + + +;; for (long int i = 0; i < 10; i++) +;; for (long int j = 0; j < 10; j++) { +;; A[10*i + j] = ... +;; ... = A[10*i + j + 11]; + +define void @banerjee8(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 10 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 10 + %add5 = add nsw i64 %mul4, %j.02 + %add6 = add nsw i64 %add5, 11 + %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6 + %0 = load i64* %arrayidx7, align 8 +; CHECK: da analyze - flow [> <>]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 10 + br i1 %exitcond, label %for.body3, label %for.inc8 + +for.inc8: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 10 + %inc9 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc9, 10 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end10 + +for.end10: ; preds = %for.inc8 + ret void +} + + +;; for (long int i = 0; i < 20; i++) +;; for (long int j = 0; j < 20; j++) { +;; A[30*i + 500*j] = ... +;; ... = A[i - 500*j + 11]; + +define void @banerjee9(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 30 + %mul4 = mul nsw i64 %j.02, 500 + %add = add nsw i64 %mul, %mul4 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %0 = mul i64 %j.02, -500 + %sub = add i64 %i.03, %0 + %add6 = add nsw i64 %sub, 11 + %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6 + %1 = load i64* %arrayidx7, align 8 +; CHECK: da analyze - flow [<= =|<]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %1, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 20 + br i1 %exitcond, label %for.body3, label %for.inc8 + +for.inc8: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 20 + %inc9 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc9, 20 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end10 + +for.end10: ; preds = %for.inc8 + ret void +} + + +;; for (long int i = 0; i < 20; i++) +;; for (long int j = 0; j < 20; j++) { +;; A[i + 500*j] = ... +;; ... = A[i - 500*j + 11]; + +define void @banerjee10(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc7 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %j.02, 500 + %add = add nsw i64 %i.03, %mul + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %0 = mul i64 %j.02, -500 + %sub = add i64 %i.03, %0 + %add5 = add nsw i64 %sub, 11 + %arrayidx6 = getelementptr inbounds i64* %A, i64 %add5 + %1 = load i64* %arrayidx6, align 8 +; CHECK: da analyze - flow [<> =]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %1, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 20 + br i1 %exitcond, label %for.body3, label %for.inc7 + +for.inc7: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 20 + %inc8 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc8, 20 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end9 + +for.end9: ; preds = %for.inc7 + ret void +} + + +;; for (long int i = 0; i < 20; i++) +;; for (long int j = 0; j < 20; j++) { +;; A[300*i + j] = ... +;; ... = A[250*i - j + 11]; + +define void @banerjee11(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc7 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 300 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 250 + %sub = sub nsw i64 %mul4, %j.02 + %add5 = add nsw i64 %sub, 11 + %arrayidx6 = getelementptr inbounds i64* %A, i64 %add5 + %0 = load i64* %arrayidx6, align 8 +; CHECK: da analyze - flow [<= <>]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 20 + br i1 %exitcond, label %for.body3, label %for.inc7 + +for.inc7: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 20 + %inc8 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc8, 20 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end9 + +for.end9: ; preds = %for.inc7 + ret void +} + + +;; for (long int i = 0; i < 20; i++) +;; for (long int j = 0; j < 20; j++) { +;; A[100*i + j] = ... +;; ... = A[100*i - j + 11]; + +define void @banerjee12(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc7 + %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %mul = mul nsw i64 %i.03, 100 + %add = add nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i64* %A, i64 %add + store i64 0, i64* %arrayidx, align 8 + %mul4 = mul nsw i64 %i.03, 100 + %sub = sub nsw i64 %mul4, %j.02 + %add5 = add nsw i64 %sub, 11 + %arrayidx6 = getelementptr inbounds i64* %A, i64 %add5 + %0 = load i64* %arrayidx6, align 8 +; CHECK: da analyze - flow [= <>]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1 + store i64 %0, i64* %B.addr.11, align 8 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 20 + br i1 %exitcond, label %for.body3, label %for.inc7 + +for.inc7: ; preds = %for.body3 + %scevgep = getelementptr i64* %B.addr.04, i64 20 + %inc8 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc8, 20 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end9 + +for.end9: ; preds = %for.inc7 + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/Coupled.ll b/llvm/test/Analysis/DependenceAnalysis/Coupled.ll new file mode 100644 index 0000000..60163fe --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/Coupled.ll @@ -0,0 +1,509 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'Coupled.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long int i = 0; i < 50; i++) +;; A[i][i] = ... +;; ... = A[i + 10][i + 9] + +define void @couple0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx1 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + store i32 %conv, i32* %arrayidx1, align 4 + %add = add nsw i64 %i.02, 9 + %add2 = add nsw i64 %i.02, 10 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %add2, i64 %add + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 50 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; A[i][i] = ... +;; ... = A[i + 9][i + 9] + +define void @couple1([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx1 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + store i32 %conv, i32* %arrayidx1, align 4 + %add = add nsw i64 %i.02, 9 + %add2 = add nsw i64 %i.02, 9 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %add2, i64 %add + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - consistent flow [-9]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 50 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; A[3*i - 6][3*i - 6] = ... +;; ... = A[i][i] + +define void @couple2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul nsw i64 %i.02, 3 + %sub = add nsw i64 %mul, -6 + %mul1 = mul nsw i64 %i.02, 3 + %sub2 = add nsw i64 %mul1, -6 + %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %sub2, i64 %sub + store i32 %conv, i32* %arrayidx3, align 4 + %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - flow [*|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 50 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; A[3*i - 6][3*i - 5] = ... +;; ... = A[i][i] + +define void @couple3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul nsw i64 %i.02, 3 + %sub = add nsw i64 %mul, -5 + %mul1 = mul nsw i64 %i.02, 3 + %sub2 = add nsw i64 %mul1, -6 + %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %sub2, i64 %sub + store i32 %conv, i32* %arrayidx3, align 4 + %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 50 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; A[3*i - 6][3*i - n] = ... +;; ... = A[i][i] + +define void @couple4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul nsw i64 %i.02, 3 + %conv1 = sext i32 %n to i64 + %sub = sub nsw i64 %mul, %conv1 + %mul2 = mul nsw i64 %i.02, 3 + %sub3 = add nsw i64 %mul2, -6 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %sub3, i64 %sub + store i32 %conv, i32* %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx6, align 4 +; CHECK: da analyze - flow [*|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 50 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; A[3*i - n + 1][3*i - n] = ... +;; ... = A[i][i] + +define void @couple5([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul nsw i64 %i.02, 3 + %conv1 = sext i32 %n to i64 + %sub = sub nsw i64 %mul, %conv1 + %mul2 = mul nsw i64 %i.02, 3 + %conv3 = sext i32 %n to i64 + %sub4 = sub nsw i64 %mul2, %conv3 + %add = add nsw i64 %sub4, 1 + %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %add, i64 %sub + store i32 %conv, i32* %arrayidx5, align 4 + %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx7, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 50 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; A[i][3*i - 6] = ... +;; ... = A[i][i] + +define void @couple6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul nsw i64 %i.02, 3 + %sub = add nsw i64 %mul, -6 + %arrayidx1 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %sub + store i32 %conv, i32* %arrayidx1, align 4 + %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - flow [=|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 50 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; A[i][3*i - 5] = ... +;; ... = A[i][i] + +define void @couple7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul nsw i64 %i.02, 3 + %sub = add nsw i64 %mul, -5 + %arrayidx1 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %sub + store i32 %conv, i32* %arrayidx1, align 4 + %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 50 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i <= 15; i++) +;; A[3*i - 18][3 - i] = ... +;; ... = A[i][i] + +define void @couple8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %sub = sub nsw i64 3, %i.02 + %mul = mul nsw i64 %i.02, 3 + %sub1 = add nsw i64 %mul, -18 + %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub + store i32 %conv, i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 16 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i <= 15; i++) +;; A[3*i - 18][2 - i] = ... +;; ... = A[i][i] + +define void @couple9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %sub = sub nsw i64 2, %i.02 + %mul = mul nsw i64 %i.02, 3 + %sub1 = add nsw i64 %mul, -18 + %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub + store i32 %conv, i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 16 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i <= 15; i++) +;; A[3*i - 18][6 - i] = ... +;; ... = A[i][i] + +define void @couple10([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %sub = sub nsw i64 6, %i.02 + %mul = mul nsw i64 %i.02, 3 + %sub1 = add nsw i64 %mul, -18 + %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub + store i32 %conv, i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - flow [>] splitable! +; CHECK: da analyze - split level = 1, iteration = 3! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 16 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i <= 15; i++) +;; A[3*i - 18][18 - i] = ... +;; ... = A[i][i] + +define void @couple11([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %sub = sub nsw i64 18, %i.02 + %mul = mul nsw i64 %i.02, 3 + %sub1 = add nsw i64 %mul, -18 + %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub + store i32 %conv, i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - flow [=|<] splitable! +; CHECK: da analyze - split level = 1, iteration = 9! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 16 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i <= 12; i++) +;; A[3*i - 18][22 - i] = ... +;; ... = A[i][i] + +define void @couple12([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %sub = sub nsw i64 22, %i.02 + %mul = mul nsw i64 %i.02, 3 + %sub1 = add nsw i64 %mul, -18 + %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub + store i32 %conv, i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - flow [<] splitable! +; CHECK: da analyze - split level = 1, iteration = 11! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 13 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 12; i++) +;; A[3*i - 18][22 - i] = ... +;; ... = A[i][i] + +define void @couple13([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %sub = sub nsw i64 22, %i.02 + %mul = mul nsw i64 %i.02, 3 + %sub1 = add nsw i64 %mul, -18 + %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub + store i32 %conv, i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 12 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; A[3*i - 18][18 - i][i] = ... +;; ... = A[i][i][i] + +define void @couple14([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %sub = sub nsw i64 18, %i.02 + %mul = mul nsw i64 %i.02, 3 + %sub1 = add nsw i64 %mul, -18 + %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub1, i64 %sub, i64 %i.02 + store i32 %conv, i32* %arrayidx3, align 4 + %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx6, align 4 +; CHECK: da analyze - flow [=|<] splitable! +; CHECK: da analyze - split level = 1, iteration = 9! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 100 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; A[3*i - 18][22 - i][i] = ... +;; ... = A[i][i][i] + +define void @couple15([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %sub = sub nsw i64 22, %i.02 + %mul = mul nsw i64 %i.02, 3 + %sub1 = add nsw i64 %mul, -18 + %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub1, i64 %sub, i64 %i.02 + store i32 %conv, i32* %arrayidx3, align 4 + %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02 + %0 = load i32* %arrayidx6, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 100 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll new file mode 100644 index 0000000..aa5d254 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll @@ -0,0 +1,508 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'ExactRDIV.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long int i = 0; i < 10; i++) +;; A[4*i + 10] = ... +;; for (long int j = 0; j < 10; j++) +;; ... = A[2*j + 1]; + +define void @rdiv0(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 2 + %add = add nsw i64 %mul, 10 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, 10 + br i1 %cmp, label %for.body, label %for.body4 + +for.body4: ; preds = %for.body4, %for.body + %j.02 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body ] + %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ] + %mul5 = shl nsw i64 %j.02, 1 + %add64 = or i64 %mul5, 1 + %arrayidx7 = getelementptr inbounds i32* %A, i64 %add64 + %0 = load i32* %arrayidx7, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc9 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc9, 10 + br i1 %cmp2, label %for.body4, label %for.end10 + +for.end10: ; preds = %for.body4 + ret void +} + + +;; for (long int i = 0; i < 5; i++) +;; A[11*i - 45] = ... +;; for (long int j = 0; j < 10; j++) +;; ... = A[j]; + +define void @rdiv1(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 11 + %sub = add nsw i64 %mul, -45 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, 5 + br i1 %cmp, label %for.body, label %for.body4 + +for.body4: ; preds = %for.body4, %for.body + %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ] + %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ] + %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02 + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc7 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc7, 10 + br i1 %cmp2, label %for.body4, label %for.end8 + +for.end8: ; preds = %for.body4 + ret void +} + + +;; for (long int i = 0; i <= 5; i++) +;; A[11*i - 45] = ... +;; for (long int j = 0; j < 10; j++) +;; ... = A[j]; + +define void @rdiv2(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 11 + %sub = add nsw i64 %mul, -45 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, 6 + br i1 %cmp, label %for.body, label %for.body4 + +for.body4: ; preds = %for.body4, %for.body + %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ] + %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ] + %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02 + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc7 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc7, 10 + br i1 %cmp2, label %for.body4, label %for.end8 + +for.end8: ; preds = %for.body4 + ret void +} + + +;; for (long int i = 0; i < 5; i++) +;; A[11*i - 45] = ... +;; for (long int j = 0; j <= 10; j++) +;; ... = A[j]; + +define void @rdiv3(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 11 + %sub = add nsw i64 %mul, -45 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, 5 + br i1 %cmp, label %for.body, label %for.body4 + +for.body4: ; preds = %for.body4, %for.body + %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ] + %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ] + %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02 + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc7 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc7, 11 + br i1 %cmp2, label %for.body4, label %for.end8 + +for.end8: ; preds = %for.body4 + ret void +} + + +;; for (long int i = 0; i <= 5; i++) +;; A[11*i - 45] = ... +;; for (long int j = 0; j <= 10; j++) +;; ... = A[j]; + +define void @rdiv4(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 11 + %sub = add nsw i64 %mul, -45 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, 6 + br i1 %cmp, label %for.body, label %for.body4 + +for.body4: ; preds = %for.body4, %for.body + %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ] + %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ] + %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02 + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - flow! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc7 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc7, 11 + br i1 %cmp2, label %for.body4, label %for.end8 + +for.end8: ; preds = %for.body4 + ret void +} + + +;; for (long int i = 0; i < 5; i++) +;; A[-11*i + 45] = ... +;; for (long int j = 0; j < 10; j++) +;; ... = A[-j]; + +define void @rdiv5(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, -11 + %add = add nsw i64 %mul, 45 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, 5 + br i1 %cmp, label %for.body, label %for.body4 + +for.body4: ; preds = %for.body4, %for.body + %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ] + %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ] + %sub = sub nsw i64 0, %j.02 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc7 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc7, 10 + br i1 %cmp2, label %for.body4, label %for.end8 + +for.end8: ; preds = %for.body4 + ret void +} + + +;; for (long int i = 0; i <= 5; i++) +;; A[-11*i + 45] = ... +;; for (long int j = 0; j < 10; j++) +;; ... = A[-j]; + +define void @rdiv6(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, -11 + %add = add nsw i64 %mul, 45 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, 6 + br i1 %cmp, label %for.body, label %for.body4 + +for.body4: ; preds = %for.body4, %for.body + %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ] + %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ] + %sub = sub nsw i64 0, %j.02 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc7 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc7, 10 + br i1 %cmp2, label %for.body4, label %for.end8 + +for.end8: ; preds = %for.body4 + ret void +} + + +;; for (long int i = 0; i < 5; i++) +;; A[-11*i + 45] = ... +;; for (long int j = 0; j <= 10; j++) +;; ... = A[-j]; + +define void @rdiv7(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, -11 + %add = add nsw i64 %mul, 45 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, 5 + br i1 %cmp, label %for.body, label %for.body4 + +for.body4: ; preds = %for.body4, %for.body + %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ] + %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ] + %sub = sub nsw i64 0, %j.02 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc7 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc7, 11 + br i1 %cmp2, label %for.body4, label %for.end8 + +for.end8: ; preds = %for.body4 + ret void +} + + +;; for (long int i = 0; i <= 5; i++) +;; A[-11*i + 45] = ... +;; for (long int j = 0; j <= 10; j++) +;; ... = A[-j]; + +define void @rdiv8(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, -11 + %add = add nsw i64 %mul, 45 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, 6 + br i1 %cmp, label %for.body, label %for.body4 + +for.body4: ; preds = %for.body4, %for.body + %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ] + %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ] + %sub = sub nsw i64 0, %j.02 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - flow! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc7 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc7, 11 + br i1 %cmp2, label %for.body4, label %for.end8 + +for.end8: ; preds = %for.body4 + ret void +} + + +;; for (long int i = 0; i < 5; i++) +;; for (long int j = 0; j < 10; j++) +;; A[11*i - j] = ... +;; ... = A[45]; + +define void @rdiv9(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc5, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 11 + %sub = sub nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32* %A, i64 45 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 10 + br i1 %cmp2, label %for.body3, label %for.inc5 + +for.inc5: ; preds = %for.body3 + %inc6 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc6, 5 + br i1 %cmp, label %for.cond1.preheader, label %for.end7 + +for.end7: ; preds = %for.inc5 + ret void +} + + +;; for (long int i = 0; i < 5; i++) +;; for (long int j = 0; j <= 10; j++) +;; A[11*i - j] = ... +;; ... = A[45]; + +define void @rdiv10(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc5, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 11 + %sub = sub nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32* %A, i64 45 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 10 + br i1 %cmp2, label %for.body3, label %for.inc5 + +for.inc5: ; preds = %for.body3 + %inc6 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc6, 6 + br i1 %cmp, label %for.cond1.preheader, label %for.end7 + +for.end7: ; preds = %for.inc5 + ret void +} + + +;; for (long int i = 0; i <= 5; i++) +;; for (long int j = 0; j <= 10; j++) +;; A[11*i - j] = ... +;; ... = A[45]; + +define void @rdiv11(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc5, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 11 + %sub = sub nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32* %A, i64 45 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 11 + br i1 %cmp2, label %for.body3, label %for.inc5 + +for.inc5: ; preds = %for.body3 + %inc6 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc6, 5 + br i1 %cmp, label %for.cond1.preheader, label %for.end7 + +for.end7: ; preds = %for.inc5 + ret void +} + + +;; for (long int i = 0; i < 5; i++) +;; for (long int j = 0; j < 10; j++) +;; A[11*i - j] = ... +;; ... = A[45]; + +define void @rdiv12(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc5, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 11 + %sub = sub nsw i64 %mul, %j.02 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32* %A, i64 45 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - flow [* *|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 11 + br i1 %cmp2, label %for.body3, label %for.inc5 + +for.inc5: ; preds = %for.body3 + %inc6 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc6, 6 + br i1 %cmp, label %for.cond1.preheader, label %for.end7 + +for.end7: ; preds = %for.inc5 + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll b/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll new file mode 100644 index 0000000..71e0502 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll @@ -0,0 +1,428 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'ExactSIV.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long unsigned i = 0; i < 10; i++) { +;; A[i + 10] = ... +;; ... = A[2*i + 1]; + +define void @exact0(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %add = add i64 %i.02, 10 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul = shl i64 %i.02, 1 + %add13 = or i64 %mul, 1 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %add13 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - flow [<=|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 10 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 10; i++) { +;; A[4*i + 10] = ... +;; ... = A[2*i + 1]; + +define void @exact1(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = shl i64 %i.02, 2 + %add = add i64 %mul, 10 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul1 = shl i64 %i.02, 1 + %add23 = or i64 %mul1, 1 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %add23 + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 10 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 10; i++) { +;; A[6*i] = ... +;; ... = A[i + 60]; + +define void @exact2(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, 6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %add = add i64 %i.02, 60 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 10 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i <= 10; i++) { +;; A[6*i] = ... +;; ... = A[i + 60]; + +define void @exact3(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, 6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %add = add i64 %i.02, 60 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [>]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 11 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 12; i++) { +;; A[6*i] = ... +;; ... = A[i + 60]; + +define void @exact4(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, 6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %add = add i64 %i.02, 60 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [>]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 12 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i <= 12; i++) { +;; A[6*i] = ... +;; ... = A[i + 60]; + +define void @exact5(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, 6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %add = add i64 %i.02, 60 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [=>|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 13 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 18; i++) { +;; A[6*i] = ... +;; ... = A[i + 60]; + +define void @exact6(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, 6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %add = add i64 %i.02, 60 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [=>|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 18 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i <= 18; i++) { +;; A[6*i] = ... +;; ... = A[i + 60]; + +define void @exact7(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, 6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %add = add i64 %i.02, 60 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [*|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 19 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 10; i++) { +;; A[-6*i] = ... +;; ... = A[-i - 60]; + +define void @exact8(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, -6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %sub1 = sub i64 -60, %i.02 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 10 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i <= 10; i++) { +;; A[-6*i] = ... +;; ... = A[-i - 60]; + +define void @exact9(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, -6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %sub1 = sub i64 -60, %i.02 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - flow [>]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 11 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 12; i++) { +;; A[-6*i] = ... +;; ... = A[-i - 60]; + +define void @exact10(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, -6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %sub1 = sub i64 -60, %i.02 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - flow [>]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 12 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i <= 12; i++) { +;; A[-6*i] = ... +;; ... = A[-i - 60]; + +define void @exact11(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, -6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %sub1 = sub i64 -60, %i.02 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - flow [=>|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 13 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 18; i++) { +;; A[-6*i] = ... +;; ... = A[-i - 60]; + +define void @exact12(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, -6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %sub1 = sub i64 -60, %i.02 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - flow [=>|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 18 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i <= 18; i++) { +;; A[-6*i] = ... +;; ... = A[-i - 60]; + +define void @exact13(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, -6 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %sub1 = sub i64 -60, %i.02 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - flow [*|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 19 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll new file mode 100644 index 0000000..94c93a8 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll @@ -0,0 +1,597 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'GCD.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[2*i - 4*j] = ... +;; ... = A[6*i + 8*j]; + +define void @gcd0(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 1 + %mul4 = shl nsw i64 %j.02, 2 + %sub = sub nsw i64 %mul, %mul4 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %mul5 = mul nsw i64 %i.03, 6 + %mul6 = shl nsw i64 %j.02, 3 + %add = add nsw i64 %mul5, %mul6 + %arrayidx7 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx7, align 4 +; CHECK: da analyze - flow [=> *|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 100 + br i1 %exitcond, label %for.body3, label %for.inc8 + +for.inc8: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.04, i64 100 + %inc9 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc9, 100 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end10 + +for.end10: ; preds = %for.inc8 + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[2*i - 4*j] = ... +;; ... = A[6*i + 8*j + 1]; + +define void @gcd1(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc9 + %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 1 + %mul4 = shl nsw i64 %j.02, 2 + %sub = sub nsw i64 %mul, %mul4 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %mul5 = mul nsw i64 %i.03, 6 + %mul6 = shl nsw i64 %j.02, 3 + %add = add nsw i64 %mul5, %mul6 + %add7 = or i64 %add, 1 + %arrayidx8 = getelementptr inbounds i32* %A, i64 %add7 + %0 = load i32* %arrayidx8, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 100 + br i1 %exitcond, label %for.body3, label %for.inc9 + +for.inc9: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.04, i64 100 + %inc10 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc10, 100 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end11 + +for.end11: ; preds = %for.inc9 + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[2*i - 4*j + 1] = ... +;; ... = A[6*i + 8*j]; + +define void @gcd2(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc9 + %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 1 + %mul4 = shl nsw i64 %j.02, 2 + %sub = sub nsw i64 %mul, %mul4 + %add5 = or i64 %sub, 1 + %arrayidx = getelementptr inbounds i32* %A, i64 %add5 + store i32 %conv, i32* %arrayidx, align 4 + %mul5 = mul nsw i64 %i.03, 6 + %mul6 = shl nsw i64 %j.02, 3 + %add7 = add nsw i64 %mul5, %mul6 + %arrayidx8 = getelementptr inbounds i32* %A, i64 %add7 + %0 = load i32* %arrayidx8, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 100 + br i1 %exitcond, label %for.body3, label %for.inc9 + +for.inc9: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.04, i64 100 + %inc10 = add nsw i64 %i.03, 1 + %exitcond6 = icmp ne i64 %inc10, 100 + br i1 %exitcond6, label %for.cond1.preheader, label %for.end11 + +for.end11: ; preds = %for.inc9 + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[i + 2*j] = ... +;; ... = A[i + 2*j - 1]; + +define void @gcd3(i32* %A, i32* %B) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc7 + %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc7 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %j.02, 1 + %add = add nsw i64 %i.03, %mul + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul4 = shl nsw i64 %j.02, 1 + %add5 = add nsw i64 %i.03, %mul4 + %sub = add nsw i64 %add5, -1 + %arrayidx6 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx6, align 4 +; CHECK: da analyze - flow [<> *]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 100 + br i1 %exitcond, label %for.body3, label %for.inc7 + +for.inc7: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.04, i64 100 + %inc8 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc8, 100 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end9 + +for.end9: ; preds = %for.inc7 + ret void +} + + +;; void gcd4(int *A, int *B, long int M, long int N) { +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) { +;; A[5*i + 10*j*M + 9*M*N] = i; +;; *B++ = A[15*i + 20*j*M - 21*N*M + 4]; + +define void @gcd4(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc17 + %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 5 + %mul4 = mul nsw i64 %j.02, 10 + %mul5 = mul nsw i64 %mul4, %M + %add = add nsw i64 %mul, %mul5 + %mul6 = mul nsw i64 %M, 9 + %mul7 = mul nsw i64 %mul6, %N + %add8 = add nsw i64 %add, %mul7 + %arrayidx = getelementptr inbounds i32* %A, i64 %add8 + store i32 %conv, i32* %arrayidx, align 4 + %mul9 = mul nsw i64 %i.03, 15 + %mul10 = mul nsw i64 %j.02, 20 + %mul11 = mul nsw i64 %mul10, %M + %add12 = add nsw i64 %mul9, %mul11 + %mul13 = mul nsw i64 %N, 21 + %mul14 = mul nsw i64 %mul13, %M + %sub = sub nsw i64 %add12, %mul14 + %add15 = add nsw i64 %sub, 4 + %arrayidx16 = getelementptr inbounds i32* %A, i64 %add15 + %0 = load i32* %arrayidx16, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 100 + br i1 %exitcond, label %for.body3, label %for.inc17 + +for.inc17: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.04, i64 100 + %inc18 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc18, 100 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end19 + +for.end19: ; preds = %for.inc17 + ret void +} + + +;; void gcd5(int *A, int *B, long int M, long int N) { +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) { +;; A[5*i + 10*j*M + 9*M*N] = i; +;; *B++ = A[15*i + 20*j*M - 21*N*M + 5]; + +define void @gcd5(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc17 + %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 5 + %mul4 = mul nsw i64 %j.02, 10 + %mul5 = mul nsw i64 %mul4, %M + %add = add nsw i64 %mul, %mul5 + %mul6 = mul nsw i64 %M, 9 + %mul7 = mul nsw i64 %mul6, %N + %add8 = add nsw i64 %add, %mul7 + %arrayidx = getelementptr inbounds i32* %A, i64 %add8 + store i32 %conv, i32* %arrayidx, align 4 + %mul9 = mul nsw i64 %i.03, 15 + %mul10 = mul nsw i64 %j.02, 20 + %mul11 = mul nsw i64 %mul10, %M + %add12 = add nsw i64 %mul9, %mul11 + %mul13 = mul nsw i64 %N, 21 + %mul14 = mul nsw i64 %mul13, %M + %sub = sub nsw i64 %add12, %mul14 + %add15 = add nsw i64 %sub, 5 + %arrayidx16 = getelementptr inbounds i32* %A, i64 %add15 + %0 = load i32* %arrayidx16, align 4 +; CHECK: da analyze - flow [<> *]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %exitcond = icmp ne i64 %inc, 100 + br i1 %exitcond, label %for.body3, label %for.inc17 + +for.inc17: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.04, i64 100 + %inc18 = add nsw i64 %i.03, 1 + %exitcond5 = icmp ne i64 %inc18, 100 + br i1 %exitcond5, label %for.cond1.preheader, label %for.end19 + +for.end19: ; preds = %for.inc17 + ret void +} + + +;; void gcd6(long int n, int A[][n], int *B) { +;; for (long int i = 0; i < n; i++) +;; for (long int j = 0; j < n; j++) { +;; A[2*i][4*j] = i; +;; *B++ = A[8*i][6*j + 1]; + +define void @gcd6(i64 %n, i32* %A, i32* %B) nounwind uwtable ssp { +entry: + %cmp4 = icmp sgt i64 %n, 0 + br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end12 + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc10 + %i.06 = phi i64 [ %inc11, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ] + %B.addr.05 = phi i32* [ %B.addr.1.lcssa, %for.inc10 ], [ %B, %for.cond1.preheader.preheader ] + %cmp21 = icmp sgt i64 %n, 0 + br i1 %cmp21, label %for.body3.preheader, label %for.inc10 + +for.body3.preheader: ; preds = %for.cond1.preheader + br label %for.body3 + +for.body3: ; preds = %for.body3.preheader, %for.body3 + %j.03 = phi i64 [ %inc, %for.body3 ], [ 0, %for.body3.preheader ] + %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.05, %for.body3.preheader ] + %conv = trunc i64 %i.06 to i32 + %mul = shl nsw i64 %j.03, 2 + %mul4 = shl nsw i64 %i.06, 1 + %0 = mul nsw i64 %mul4, %n + %arrayidx.sum = add i64 %0, %mul + %arrayidx5 = getelementptr inbounds i32* %A, i64 %arrayidx.sum + store i32 %conv, i32* %arrayidx5, align 4 + %mul6 = mul nsw i64 %j.03, 6 + %add7 = or i64 %mul6, 1 + %mul7 = shl nsw i64 %i.06, 3 + %1 = mul nsw i64 %mul7, %n + %arrayidx8.sum = add i64 %1, %add7 + %arrayidx9 = getelementptr inbounds i32* %A, i64 %arrayidx8.sum + %2 = load i32* %arrayidx9, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1 + store i32 %2, i32* %B.addr.12, align 4 + %inc = add nsw i64 %j.03, 1 + %exitcond = icmp ne i64 %inc, %n + br i1 %exitcond, label %for.body3, label %for.inc10.loopexit + +for.inc10.loopexit: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.05, i64 %n + br label %for.inc10 + +for.inc10: ; preds = %for.inc10.loopexit, %for.cond1.preheader + %B.addr.1.lcssa = phi i32* [ %B.addr.05, %for.cond1.preheader ], [ %scevgep, %for.inc10.loopexit ] + %inc11 = add nsw i64 %i.06, 1 + %exitcond8 = icmp ne i64 %inc11, %n + br i1 %exitcond8, label %for.cond1.preheader, label %for.end12.loopexit + +for.end12.loopexit: ; preds = %for.inc10 + br label %for.end12 + +for.end12: ; preds = %for.end12.loopexit, %entry + ret void +} + + +;; void gcd7(int n, int A[][n], int *B) { +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < n; j++) { +;; A[2*i][4*j] = i; +;; *B++ = A[8*i][6*j + 1]; + +define void @gcd7(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp { +entry: + %0 = zext i32 %n to i64 + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15 + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc13 + %indvars.iv8 = phi i64 [ 0, %for.cond1.preheader.preheader ], [ %indvars.iv.next9, %for.inc13 ] + %B.addr.05 = phi i32* [ %B.addr.1.lcssa, %for.inc13 ], [ %B, %for.cond1.preheader.preheader ] + %1 = add i32 %n, -1 + %2 = zext i32 %1 to i64 + %3 = add i64 %2, 1 + %cmp21 = icmp sgt i32 %n, 0 + br i1 %cmp21, label %for.body3.preheader, label %for.inc13 + +for.body3.preheader: ; preds = %for.cond1.preheader + br label %for.body3 + +for.body3: ; preds = %for.body3.preheader, %for.body3 + %indvars.iv = phi i64 [ 0, %for.body3.preheader ], [ %indvars.iv.next, %for.body3 ] + %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.05, %for.body3.preheader ] + %4 = trunc i64 %indvars.iv to i32 + %mul = shl nsw i32 %4, 2 + %idxprom = sext i32 %mul to i64 + %5 = trunc i64 %indvars.iv8 to i32 + %mul4 = shl nsw i32 %5, 1 + %idxprom5 = sext i32 %mul4 to i64 + %6 = mul nsw i64 %idxprom5, %0 + %arrayidx.sum = add i64 %6, %idxprom + %arrayidx6 = getelementptr inbounds i32* %A, i64 %arrayidx.sum + %7 = trunc i64 %indvars.iv8 to i32 + store i32 %7, i32* %arrayidx6, align 4 + %8 = trunc i64 %indvars.iv to i32 + %mul7 = mul nsw i32 %8, 6 + %add7 = or i32 %mul7, 1 + %idxprom8 = sext i32 %add7 to i64 + %9 = trunc i64 %indvars.iv8 to i32 + %mul9 = shl nsw i32 %9, 3 + %idxprom10 = sext i32 %mul9 to i64 + %10 = mul nsw i64 %idxprom10, %0 + %arrayidx11.sum = add i64 %10, %idxprom8 + %arrayidx12 = getelementptr inbounds i32* %A, i64 %arrayidx11.sum + %11 = load i32* %arrayidx12, align 4 +; CHECK: da analyze - flow [* *|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1 + store i32 %11, i32* %B.addr.12, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body3, label %for.inc13.loopexit + +for.inc13.loopexit: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.05, i64 %3 + br label %for.inc13 + +for.inc13: ; preds = %for.inc13.loopexit, %for.cond1.preheader + %B.addr.1.lcssa = phi i32* [ %B.addr.05, %for.cond1.preheader ], [ %scevgep, %for.inc13.loopexit ] + %indvars.iv.next9 = add i64 %indvars.iv8, 1 + %lftr.wideiv10 = trunc i64 %indvars.iv.next9 to i32 + %exitcond11 = icmp ne i32 %lftr.wideiv10, %n + br i1 %exitcond11, label %for.cond1.preheader, label %for.end15.loopexit + +for.end15.loopexit: ; preds = %for.inc13 + br label %for.end15 + +for.end15: ; preds = %for.end15.loopexit, %entry + ret void +} + + +;; void gcd8(int n, int *A, int *B) { +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < n; j++) { +;; A[n*2*i + 4*j] = i; +;; *B++ = A[n*8*i + 6*j + 1]; + +define void @gcd8(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15 + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc13 + %i.06 = phi i32 [ %inc14, %for.inc13 ], [ 0, %for.cond1.preheader.preheader ] + %B.addr.05 = phi i32* [ %B.addr.1.lcssa, %for.inc13 ], [ %B, %for.cond1.preheader.preheader ] + %0 = add i32 %n, -1 + %1 = zext i32 %0 to i64 + %2 = add i64 %1, 1 + %cmp21 = icmp sgt i32 %n, 0 + br i1 %cmp21, label %for.body3.preheader, label %for.inc13 + +for.body3.preheader: ; preds = %for.cond1.preheader + br label %for.body3 + +for.body3: ; preds = %for.body3.preheader, %for.body3 + %indvars.iv = phi i64 [ 0, %for.body3.preheader ], [ %indvars.iv.next, %for.body3 ] + %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.05, %for.body3.preheader ] + %mul = shl nsw i32 %n, 1 + %mul4 = mul nsw i32 %mul, %i.06 + %3 = trunc i64 %indvars.iv to i32 + %mul5 = shl nsw i32 %3, 2 + %add = add nsw i32 %mul4, %mul5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom + store i32 %i.06, i32* %arrayidx, align 4 + %mul6 = shl nsw i32 %n, 3 + %mul7 = mul nsw i32 %mul6, %i.06 + %4 = trunc i64 %indvars.iv to i32 + %mul8 = mul nsw i32 %4, 6 + %add9 = add nsw i32 %mul7, %mul8 + %add10 = or i32 %add9, 1 + %idxprom11 = sext i32 %add10 to i64 + %arrayidx12 = getelementptr inbounds i32* %A, i64 %idxprom11 + %5 = load i32* %arrayidx12, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1 + store i32 %5, i32* %B.addr.12, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body3, label %for.inc13.loopexit + +for.inc13.loopexit: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.05, i64 %2 + br label %for.inc13 + +for.inc13: ; preds = %for.inc13.loopexit, %for.cond1.preheader + %B.addr.1.lcssa = phi i32* [ %B.addr.05, %for.cond1.preheader ], [ %scevgep, %for.inc13.loopexit ] + %inc14 = add nsw i32 %i.06, 1 + %exitcond7 = icmp ne i32 %inc14, %n + br i1 %exitcond7, label %for.cond1.preheader, label %for.end15.loopexit + +for.end15.loopexit: ; preds = %for.inc13 + br label %for.end15 + +for.end15: ; preds = %for.end15.loopexit, %entry + ret void +} + + +;; void gcd9(unsigned n, int A[][n], int *B) { +;; for (unsigned i = 0; i < n; i++) +;; for (unsigned j = 0; j < n; j++) { +;; A[2*i][4*j] = i; +;; *B++ = A[8*i][6*j + 1]; + +define void @gcd9(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp { +entry: + %0 = zext i32 %n to i64 + %cmp4 = icmp eq i32 %n, 0 + br i1 %cmp4, label %for.end15, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc13 + %indvars.iv8 = phi i64 [ 0, %for.cond1.preheader.preheader ], [ %indvars.iv.next9, %for.inc13 ] + %B.addr.05 = phi i32* [ %B.addr.1.lcssa, %for.inc13 ], [ %B, %for.cond1.preheader.preheader ] + %1 = add i32 %n, -1 + %2 = zext i32 %1 to i64 + %3 = add i64 %2, 1 + %cmp21 = icmp eq i32 %n, 0 + br i1 %cmp21, label %for.inc13, label %for.body3.preheader + +for.body3.preheader: ; preds = %for.cond1.preheader + br label %for.body3 + +for.body3: ; preds = %for.body3.preheader, %for.body3 + %indvars.iv = phi i64 [ 0, %for.body3.preheader ], [ %indvars.iv.next, %for.body3 ] + %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.05, %for.body3.preheader ] + %4 = trunc i64 %indvars.iv to i32 + %mul = shl i32 %4, 2 + %idxprom = zext i32 %mul to i64 + %5 = trunc i64 %indvars.iv8 to i32 + %mul4 = shl i32 %5, 1 + %idxprom5 = zext i32 %mul4 to i64 + %6 = mul nsw i64 %idxprom5, %0 + %arrayidx.sum = add i64 %6, %idxprom + %arrayidx6 = getelementptr inbounds i32* %A, i64 %arrayidx.sum + %7 = trunc i64 %indvars.iv8 to i32 + store i32 %7, i32* %arrayidx6, align 4 + %8 = trunc i64 %indvars.iv to i32 + %mul7 = mul i32 %8, 6 + %add7 = or i32 %mul7, 1 + %idxprom8 = zext i32 %add7 to i64 + %9 = trunc i64 %indvars.iv8 to i32 + %mul9 = shl i32 %9, 3 + %idxprom10 = zext i32 %mul9 to i64 + %10 = mul nsw i64 %idxprom10, %0 + %arrayidx11.sum = add i64 %10, %idxprom8 + %arrayidx12 = getelementptr inbounds i32* %A, i64 %arrayidx11.sum + %11 = load i32* %arrayidx12, align 4 +; CHECK: da analyze - flow [* *|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1 + store i32 %11, i32* %B.addr.12, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body3, label %for.inc13.loopexit + +for.inc13.loopexit: ; preds = %for.body3 + %scevgep = getelementptr i32* %B.addr.05, i64 %3 + br label %for.inc13 + +for.inc13: ; preds = %for.inc13.loopexit, %for.cond1.preheader + %B.addr.1.lcssa = phi i32* [ %B.addr.05, %for.cond1.preheader ], [ %scevgep, %for.inc13.loopexit ] + %indvars.iv.next9 = add i64 %indvars.iv8, 1 + %lftr.wideiv10 = trunc i64 %indvars.iv.next9 to i32 + %exitcond11 = icmp ne i32 %lftr.wideiv10, %n + br i1 %exitcond11, label %for.cond1.preheader, label %for.end15.loopexit + +for.end15.loopexit: ; preds = %for.inc13 + br label %for.end15 + +for.end15: ; preds = %for.end15.loopexit, %entry + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll new file mode 100644 index 0000000..3ef63fd --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll @@ -0,0 +1,469 @@ +; RUN: opt < %s -analyze -basicaa -indvars -da | FileCheck %s + +; This series of tests is more interesting when debugging is enabled. + +; ModuleID = 'Preliminary.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; may alias +;; int p0(int n, int *A, int *B) { +;; A[0] = n; +;; return B[1]; + +define i32 @p0(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp { +entry: + store i32 %n, i32* %A, align 4 + %arrayidx1 = getelementptr inbounds i32* %B, i64 1 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - confused! + ret i32 %0 +} + + +;; no alias +;; int p1(int n, int *restrict A, int *restrict B) { +;; A[0] = n; +;; return B[1]; + +define i32 @p1(i32 %n, i32* noalias %A, i32* noalias %B) nounwind uwtable ssp { +entry: + store i32 %n, i32* %A, align 4 + %arrayidx1 = getelementptr inbounds i32* %B, i64 1 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + ret i32 %0 +} + +;; check loop nesting levels +;; for (long int i = 0; i < n; i++) +;; for (long int j = 0; j < n; j++) +;; for (long int k = 0; k < n; k++) +;; A[i][j][k] = ... +;; for (long int k = 0; k < n; k++) +;; ... = A[i + 3][j + 2][k + 1]; + +define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp { +entry: + %cmp10 = icmp sgt i64 %n, 0 + br i1 %cmp10, label %for.cond1.preheader, label %for.end26 + +for.cond1.preheader: ; preds = %for.inc24, %entry + %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %entry ] + %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %entry ] + %cmp26 = icmp sgt i64 %n, 0 + br i1 %cmp26, label %for.cond4.preheader, label %for.inc24 + +for.cond4.preheader: ; preds = %for.inc21, %for.cond1.preheader + %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond1.preheader ] + %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond1.preheader ] + %cmp51 = icmp sgt i64 %n, 0 + br i1 %cmp51, label %for.body6, label %for.cond10.loopexit + +for.body6: ; preds = %for.body6, %for.cond4.preheader + %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.cond4.preheader ] + %arrayidx8 = getelementptr inbounds [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02 + store i64 %i.011, i64* %arrayidx8, align 8 + %inc = add nsw i64 %k.02, 1 + %cmp5 = icmp slt i64 %inc, %n + br i1 %cmp5, label %for.body6, label %for.cond10.loopexit + +for.cond10.loopexit: ; preds = %for.body6, %for.cond4.preheader + %cmp113 = icmp sgt i64 %n, 0 + br i1 %cmp113, label %for.body12, label %for.inc21 + +for.body12: ; preds = %for.body12, %for.cond10.loopexit + %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.cond10.loopexit ] + %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.cond10.loopexit ] + %add = add nsw i64 %k9.05, 1 + %add13 = add nsw i64 %j.07, 2 + %add14 = add nsw i64 %i.011, 3 + %arrayidx17 = getelementptr inbounds [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add + %0 = load i64* %arrayidx17, align 8 +; CHECK: da analyze - flow [-3 -2]! + %incdec.ptr = getelementptr inbounds i64* %B.addr.24, i64 1 + store i64 %0, i64* %B.addr.24, align 8 + %inc19 = add nsw i64 %k9.05, 1 + %cmp11 = icmp slt i64 %inc19, %n + br i1 %cmp11, label %for.body12, label %for.inc21 + +for.inc21: ; preds = %for.body12, %for.cond10.loopexit + %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %incdec.ptr, %for.body12 ] + %inc22 = add nsw i64 %j.07, 1 + %cmp2 = icmp slt i64 %inc22, %n + br i1 %cmp2, label %for.cond4.preheader, label %for.inc24 + +for.inc24: ; preds = %for.inc21, %for.cond1.preheader + %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa, %for.inc21 ] + %inc25 = add nsw i64 %i.011, 1 + %cmp = icmp slt i64 %inc25, %n + br i1 %cmp, label %for.cond1.preheader, label %for.end26 + +for.end26: ; preds = %for.inc24, %entry + ret void +} + + +;; classify subscripts +;; for (long int i = 0; i < n; i++) +;; for (long int j = 0; j < n; j++) +;; for (long int k = 0; k < n; k++) +;; for (long int l = 0; l < n; l++) +;; for (long int m = 0; m < n; m++) +;; for (long int o = 0; o < n; o++) +;; for (long int p = 0; p < n; p++) +;; for (long int q = 0; q < n; q++) +;; for (long int r = 0; r < n; r++) +;; for (long int s = 0; s < n; s++) +;; for (long int u = 0; u < n; u++) +;; for (long int t = 0; t < n; t++) { +;; A[i - 3] [j] [2] [k-1] [2*l + 1] [m] [p + q] [r + s] = ... +;; ... = A[i + 3] [2] [u] [1-k] [3*l - 1] [o] [1 + n] [t + 2]; + +define void @p3(i64 %n, [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64* %B) nounwind uwtable ssp { +entry: + %cmp44 = icmp sgt i64 %n, 0 + br i1 %cmp44, label %for.cond1.preheader, label %for.end90 + +for.cond1.preheader: ; preds = %for.inc88, %entry + %B.addr.046 = phi i64* [ %B.addr.1.lcssa, %for.inc88 ], [ %B, %entry ] + %i.045 = phi i64 [ %inc89, %for.inc88 ], [ 0, %entry ] + %cmp240 = icmp sgt i64 %n, 0 + br i1 %cmp240, label %for.cond4.preheader, label %for.inc88 + +for.cond4.preheader: ; preds = %for.inc85, %for.cond1.preheader + %B.addr.142 = phi i64* [ %B.addr.2.lcssa, %for.inc85 ], [ %B.addr.046, %for.cond1.preheader ] + %j.041 = phi i64 [ %inc86, %for.inc85 ], [ 0, %for.cond1.preheader ] + %cmp536 = icmp sgt i64 %n, 0 + br i1 %cmp536, label %for.cond7.preheader, label %for.inc85 + +for.cond7.preheader: ; preds = %for.inc82, %for.cond4.preheader + %B.addr.238 = phi i64* [ %B.addr.3.lcssa, %for.inc82 ], [ %B.addr.142, %for.cond4.preheader ] + %k.037 = phi i64 [ %inc83, %for.inc82 ], [ 0, %for.cond4.preheader ] + %cmp832 = icmp sgt i64 %n, 0 + br i1 %cmp832, label %for.cond10.preheader, label %for.inc82 + +for.cond10.preheader: ; preds = %for.inc79, %for.cond7.preheader + %B.addr.334 = phi i64* [ %B.addr.4.lcssa, %for.inc79 ], [ %B.addr.238, %for.cond7.preheader ] + %l.033 = phi i64 [ %inc80, %for.inc79 ], [ 0, %for.cond7.preheader ] + %cmp1128 = icmp sgt i64 %n, 0 + br i1 %cmp1128, label %for.cond13.preheader, label %for.inc79 + +for.cond13.preheader: ; preds = %for.inc76, %for.cond10.preheader + %B.addr.430 = phi i64* [ %B.addr.5.lcssa, %for.inc76 ], [ %B.addr.334, %for.cond10.preheader ] + %m.029 = phi i64 [ %inc77, %for.inc76 ], [ 0, %for.cond10.preheader ] + %cmp1424 = icmp sgt i64 %n, 0 + br i1 %cmp1424, label %for.cond16.preheader, label %for.inc76 + +for.cond16.preheader: ; preds = %for.inc73, %for.cond13.preheader + %B.addr.526 = phi i64* [ %B.addr.6.lcssa, %for.inc73 ], [ %B.addr.430, %for.cond13.preheader ] + %o.025 = phi i64 [ %inc74, %for.inc73 ], [ 0, %for.cond13.preheader ] + %cmp1720 = icmp sgt i64 %n, 0 + br i1 %cmp1720, label %for.cond19.preheader, label %for.inc73 + +for.cond19.preheader: ; preds = %for.inc70, %for.cond16.preheader + %B.addr.622 = phi i64* [ %B.addr.7.lcssa, %for.inc70 ], [ %B.addr.526, %for.cond16.preheader ] + %p.021 = phi i64 [ %inc71, %for.inc70 ], [ 0, %for.cond16.preheader ] + %cmp2016 = icmp sgt i64 %n, 0 + br i1 %cmp2016, label %for.cond22.preheader, label %for.inc70 + +for.cond22.preheader: ; preds = %for.inc67, %for.cond19.preheader + %B.addr.718 = phi i64* [ %B.addr.8.lcssa, %for.inc67 ], [ %B.addr.622, %for.cond19.preheader ] + %q.017 = phi i64 [ %inc68, %for.inc67 ], [ 0, %for.cond19.preheader ] + %cmp2312 = icmp sgt i64 %n, 0 + br i1 %cmp2312, label %for.cond25.preheader, label %for.inc67 + +for.cond25.preheader: ; preds = %for.inc64, %for.cond22.preheader + %B.addr.814 = phi i64* [ %B.addr.9.lcssa, %for.inc64 ], [ %B.addr.718, %for.cond22.preheader ] + %r.013 = phi i64 [ %inc65, %for.inc64 ], [ 0, %for.cond22.preheader ] + %cmp268 = icmp sgt i64 %n, 0 + br i1 %cmp268, label %for.cond28.preheader, label %for.inc64 + +for.cond28.preheader: ; preds = %for.inc61, %for.cond25.preheader + %B.addr.910 = phi i64* [ %B.addr.10.lcssa, %for.inc61 ], [ %B.addr.814, %for.cond25.preheader ] + %s.09 = phi i64 [ %inc62, %for.inc61 ], [ 0, %for.cond25.preheader ] + %cmp294 = icmp sgt i64 %n, 0 + br i1 %cmp294, label %for.cond31.preheader, label %for.inc61 + +for.cond31.preheader: ; preds = %for.inc58, %for.cond28.preheader + %u.06 = phi i64 [ %inc59, %for.inc58 ], [ 0, %for.cond28.preheader ] + %B.addr.105 = phi i64* [ %B.addr.11.lcssa, %for.inc58 ], [ %B.addr.910, %for.cond28.preheader ] + %cmp321 = icmp sgt i64 %n, 0 + br i1 %cmp321, label %for.body33, label %for.inc58 + +for.body33: ; preds = %for.body33, %for.cond31.preheader + %t.03 = phi i64 [ %inc, %for.body33 ], [ 0, %for.cond31.preheader ] + %B.addr.112 = phi i64* [ %incdec.ptr, %for.body33 ], [ %B.addr.105, %for.cond31.preheader ] + %add = add nsw i64 %r.013, %s.09 + %add34 = add nsw i64 %p.021, %q.017 + %mul = shl nsw i64 %l.033, 1 + %add3547 = or i64 %mul, 1 + %sub = add nsw i64 %k.037, -1 + %sub36 = add nsw i64 %i.045, -3 + %arrayidx43 = getelementptr inbounds [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64 %sub36, i64 %j.041, i64 2, i64 %sub, i64 %add3547, i64 %m.029, i64 %add34, i64 %add + store i64 %i.045, i64* %arrayidx43, align 8 + %add44 = add nsw i64 %t.03, 2 + %add45 = add nsw i64 %n, 1 + %mul46 = mul nsw i64 %l.033, 3 + %sub47 = add nsw i64 %mul46, -1 + %sub48 = sub nsw i64 1, %k.037 + %add49 = add nsw i64 %i.045, 3 + %arrayidx57 = getelementptr inbounds [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64 %add49, i64 2, i64 %u.06, i64 %sub48, i64 %sub47, i64 %o.025, i64 %add45, i64 %add44 + %0 = load i64* %arrayidx57, align 8 +; CHECK: da analyze - flow [-6 * * => * * * * * * * *] splitable! +; CHECK: da analyze - split level = 3, iteration = 1! + %incdec.ptr = getelementptr inbounds i64* %B.addr.112, i64 1 + store i64 %0, i64* %B.addr.112, align 8 + %inc = add nsw i64 %t.03, 1 + %cmp32 = icmp slt i64 %inc, %n + br i1 %cmp32, label %for.body33, label %for.inc58 + +for.inc58: ; preds = %for.body33, %for.cond31.preheader + %B.addr.11.lcssa = phi i64* [ %B.addr.105, %for.cond31.preheader ], [ %incdec.ptr, %for.body33 ] + %inc59 = add nsw i64 %u.06, 1 + %cmp29 = icmp slt i64 %inc59, %n + br i1 %cmp29, label %for.cond31.preheader, label %for.inc61 + +for.inc61: ; preds = %for.inc58, %for.cond28.preheader + %B.addr.10.lcssa = phi i64* [ %B.addr.910, %for.cond28.preheader ], [ %B.addr.11.lcssa, %for.inc58 ] + %inc62 = add nsw i64 %s.09, 1 + %cmp26 = icmp slt i64 %inc62, %n + br i1 %cmp26, label %for.cond28.preheader, label %for.inc64 + +for.inc64: ; preds = %for.inc61, %for.cond25.preheader + %B.addr.9.lcssa = phi i64* [ %B.addr.814, %for.cond25.preheader ], [ %B.addr.10.lcssa, %for.inc61 ] + %inc65 = add nsw i64 %r.013, 1 + %cmp23 = icmp slt i64 %inc65, %n + br i1 %cmp23, label %for.cond25.preheader, label %for.inc67 + +for.inc67: ; preds = %for.inc64, %for.cond22.preheader + %B.addr.8.lcssa = phi i64* [ %B.addr.718, %for.cond22.preheader ], [ %B.addr.9.lcssa, %for.inc64 ] + %inc68 = add nsw i64 %q.017, 1 + %cmp20 = icmp slt i64 %inc68, %n + br i1 %cmp20, label %for.cond22.preheader, label %for.inc70 + +for.inc70: ; preds = %for.inc67, %for.cond19.preheader + %B.addr.7.lcssa = phi i64* [ %B.addr.622, %for.cond19.preheader ], [ %B.addr.8.lcssa, %for.inc67 ] + %inc71 = add nsw i64 %p.021, 1 + %cmp17 = icmp slt i64 %inc71, %n + br i1 %cmp17, label %for.cond19.preheader, label %for.inc73 + +for.inc73: ; preds = %for.inc70, %for.cond16.preheader + %B.addr.6.lcssa = phi i64* [ %B.addr.526, %for.cond16.preheader ], [ %B.addr.7.lcssa, %for.inc70 ] + %inc74 = add nsw i64 %o.025, 1 + %cmp14 = icmp slt i64 %inc74, %n + br i1 %cmp14, label %for.cond16.preheader, label %for.inc76 + +for.inc76: ; preds = %for.inc73, %for.cond13.preheader + %B.addr.5.lcssa = phi i64* [ %B.addr.430, %for.cond13.preheader ], [ %B.addr.6.lcssa, %for.inc73 ] + %inc77 = add nsw i64 %m.029, 1 + %cmp11 = icmp slt i64 %inc77, %n + br i1 %cmp11, label %for.cond13.preheader, label %for.inc79 + +for.inc79: ; preds = %for.inc76, %for.cond10.preheader + %B.addr.4.lcssa = phi i64* [ %B.addr.334, %for.cond10.preheader ], [ %B.addr.5.lcssa, %for.inc76 ] + %inc80 = add nsw i64 %l.033, 1 + %cmp8 = icmp slt i64 %inc80, %n + br i1 %cmp8, label %for.cond10.preheader, label %for.inc82 + +for.inc82: ; preds = %for.inc79, %for.cond7.preheader + %B.addr.3.lcssa = phi i64* [ %B.addr.238, %for.cond7.preheader ], [ %B.addr.4.lcssa, %for.inc79 ] + %inc83 = add nsw i64 %k.037, 1 + %cmp5 = icmp slt i64 %inc83, %n + br i1 %cmp5, label %for.cond7.preheader, label %for.inc85 + +for.inc85: ; preds = %for.inc82, %for.cond4.preheader + %B.addr.2.lcssa = phi i64* [ %B.addr.142, %for.cond4.preheader ], [ %B.addr.3.lcssa, %for.inc82 ] + %inc86 = add nsw i64 %j.041, 1 + %cmp2 = icmp slt i64 %inc86, %n + br i1 %cmp2, label %for.cond4.preheader, label %for.inc88 + +for.inc88: ; preds = %for.inc85, %for.cond1.preheader + %B.addr.1.lcssa = phi i64* [ %B.addr.046, %for.cond1.preheader ], [ %B.addr.2.lcssa, %for.inc85 ] + %inc89 = add nsw i64 %i.045, 1 + %cmp = icmp slt i64 %inc89, %n + br i1 %cmp, label %for.cond1.preheader, label %for.end90 + +for.end90: ; preds = %for.inc88, %entry + ret void +} + + +;; cleanup around chars, shorts, ints +;;void p4(int *A, int *B, long int n) +;; for (char i = 0; i < n; i++) +;; A[i + 2] = ... +;; ... = A[i]; + +define void @p4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp sgt i64 %n, 0 + br i1 %cmp1, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.03 = phi i8 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv2 = sext i8 %i.03 to i32 + %conv3 = sext i8 %i.03 to i64 + %add = add i64 %conv3, 2 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv2, i32* %arrayidx, align 4 + %idxprom4 = sext i8 %i.03 to i64 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %idxprom4 + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - flow [*|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i8 %i.03, 1 + %conv = sext i8 %inc to i64 + %cmp = icmp slt i64 %conv, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;;void p5(int *A, int *B, long int n) +;; for (short i = 0; i < n; i++) +;; A[i + 2] = ... +;; ... = A[i]; + +define void @p5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp sgt i64 %n, 0 + br i1 %cmp1, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.03 = phi i16 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv2 = sext i16 %i.03 to i32 + %conv3 = sext i16 %i.03 to i64 + %add = add i64 %conv3, 2 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv2, i32* %arrayidx, align 4 + %idxprom4 = sext i16 %i.03 to i64 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %idxprom4 + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - flow [*|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i16 %i.03, 1 + %conv = sext i16 %inc to i64 + %cmp = icmp slt i64 %conv, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;;void p6(int *A, int *B, long int n) +;; for (int i = 0; i < n; i++) +;; A[i + 2] = ... +;; ... = A[i]; + +define void @p6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp sgt i64 %n, 0 + br i1 %cmp1, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %add = add nsw i32 %i.03, 2 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom + store i32 %i.03, i32* %arrayidx, align 4 + %idxprom2 = sext i32 %i.03 to i64 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2 + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - consistent flow [2]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i32 %i.03, 1 + %conv = sext i32 %inc to i64 + %cmp = icmp slt i64 %conv, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;;void p7(unsigned *A, unsigned *B, char n) +;; A[n] = ... +;; ... = A[n + 1]; + +define void @p7(i32* %A, i32* %B, i8 signext %n) nounwind uwtable ssp { +entry: + %idxprom = sext i8 %n to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom + store i32 0, i32* %arrayidx, align 4 + %conv = sext i8 %n to i64 + %add = add i64 %conv, 1 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - none! + store i32 %0, i32* %B, align 4 + ret void +} + + + +;;void p8(unsigned *A, unsigned *B, short n) +;; A[n] = ... +;; ... = A[n + 1]; + +define void @p8(i32* %A, i32* %B, i16 signext %n) nounwind uwtable ssp { +entry: + %idxprom = sext i16 %n to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom + store i32 0, i32* %arrayidx, align 4 + %conv = sext i16 %n to i64 + %add = add i64 %conv, 1 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - none! + store i32 %0, i32* %B, align 4 + ret void +} + + +;;void p9(unsigned *A, unsigned *B, int n) +;; A[n] = ... +;; ... = A[n + 1]; + +define void @p9(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom + store i32 0, i32* %arrayidx, align 4 + %add = add nsw i32 %n, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - none! + store i32 %0, i32* %B, align 4 + ret void +} + + +;;void p10(unsigned *A, unsigned *B, unsigned n) +;; A[n] = ... +;; ... = A[n + 1]; + +define void @p10(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + %idxprom = zext i32 %n to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom + store i32 0, i32* %arrayidx, align 4 + %add = add i32 %n, 1 + %idxprom1 = zext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - none! + store i32 %0, i32* %B, align 4 + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll new file mode 100644 index 0000000..076348c --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll @@ -0,0 +1,467 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'Propagating.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[i + 1][i + j] = i; +;; *B++ = A[i][i + j]; + +define void @prop0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc9, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc9 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %add = add nsw i64 %i.03, %j.02 + %add4 = add nsw i64 %i.03, 1 + %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %add4, i64 %add + store i32 %conv, i32* %arrayidx5, align 4 + %add6 = add nsw i64 %i.03, %j.02 + %arrayidx8 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add6 + %0 = load i32* %arrayidx8, align 4 +; CHECK: da analyze - consistent flow [1 -1]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.inc9 + +for.inc9: ; preds = %for.body3 + %inc10 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc10, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end11 + +for.end11: ; preds = %for.inc9 + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; for (long int k = 0; k < 100; k++) +;; A[j - i][i + 1][j + k] = ... +;; ... = A[j - i][i][j + k]; + +define void @prop1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc18, %entry + %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc18 ] + %i.05 = phi i64 [ 0, %entry ], [ %inc19, %for.inc18 ] + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.inc15, %for.cond1.preheader + %B.addr.14 = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %incdec.ptr, %for.inc15 ] + %j.03 = phi i64 [ 0, %for.cond1.preheader ], [ %inc16, %for.inc15 ] + br label %for.body6 + +for.body6: ; preds = %for.body6, %for.cond4.preheader + %k.02 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ] + %B.addr.21 = phi i32* [ %B.addr.14, %for.cond4.preheader ], [ %incdec.ptr, %for.body6 ] + %conv = trunc i64 %i.05 to i32 + %add = add nsw i64 %j.03, %k.02 + %add7 = add nsw i64 %i.05, 1 + %sub = sub nsw i64 %j.03, %i.05 + %arrayidx9 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub, i64 %add7, i64 %add + store i32 %conv, i32* %arrayidx9, align 4 + %add10 = add nsw i64 %j.03, %k.02 + %sub11 = sub nsw i64 %j.03, %i.05 + %arrayidx14 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub11, i64 %i.05, i64 %add10 + %0 = load i32* %arrayidx14, align 4 +; CHECK: da analyze - consistent flow [1 1 -1]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.21, i64 1 + store i32 %0, i32* %B.addr.21, align 4 + %inc = add nsw i64 %k.02, 1 + %cmp5 = icmp slt i64 %inc, 100 + br i1 %cmp5, label %for.body6, label %for.inc15 + +for.inc15: ; preds = %for.body6 + %inc16 = add nsw i64 %j.03, 1 + %cmp2 = icmp slt i64 %inc16, 100 + br i1 %cmp2, label %for.cond4.preheader, label %for.inc18 + +for.inc18: ; preds = %for.inc15 + %inc19 = add nsw i64 %i.05, 1 + %cmp = icmp slt i64 %inc19, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end20 + +for.end20: ; preds = %for.inc18 + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[i - 1][2*i] = ... +;; ... = A[i][i + j + 110]; + +define void @prop2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc8, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc8 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 1 + %sub = add nsw i64 %i.03, -1 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %sub, i64 %mul + store i32 %conv, i32* %arrayidx4, align 4 + %add = add nsw i64 %i.03, %j.02 + %add5 = add nsw i64 %add, 110 + %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add5 + %0 = load i32* %arrayidx7, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.inc8 + +for.inc8: ; preds = %for.body3 + %inc9 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc9, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end10 + +for.end10: ; preds = %for.inc8 + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[i][2*j + i] = ... +;; ... = A[i][2*j - i + 5]; + +define void @prop3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc9, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc9 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %j.02, 1 + %add = add nsw i64 %mul, %i.03 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add + store i32 %conv, i32* %arrayidx4, align 4 + %mul5 = shl nsw i64 %j.02, 1 + %sub = sub nsw i64 %mul5, %i.03 + %add6 = add nsw i64 %sub, 5 + %arrayidx8 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add6 + %0 = load i32* %arrayidx8, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.inc9 + +for.inc9: ; preds = %for.body3 + %inc10 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc10, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end11 + +for.end11: ; preds = %for.inc9 + ret void +} + + +;; propagate Distance +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[i + 2][2*i + j + 1] = ... +;; ... = A[i][2*i + j]; + +define void @prop4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc11, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc11 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc12, %for.inc11 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 1 + %add = add nsw i64 %mul, %j.02 + %add4 = add nsw i64 %add, 1 + %add5 = add nsw i64 %i.03, 2 + %arrayidx6 = getelementptr inbounds [100 x i32]* %A, i64 %add5, i64 %add4 + store i32 %conv, i32* %arrayidx6, align 4 + %mul7 = shl nsw i64 %i.03, 1 + %add8 = add nsw i64 %mul7, %j.02 + %arrayidx10 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add8 + %0 = load i32* %arrayidx10, align 4 +; CHECK: da analyze - consistent flow [2 -3]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.inc11 + +for.inc11: ; preds = %for.body3 + %inc12 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc12, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end13 + +for.end13: ; preds = %for.inc11 + ret void +} + + +;; propagate Point +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[3*i - 18][22 - i][2*i + j] = ... +;; ... = A[i][i][3*i + j]; + +define void @prop5([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc13, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc13 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 1 + %add = add nsw i64 %mul, %j.02 + %sub = sub nsw i64 22, %i.03 + %mul4 = mul nsw i64 %i.03, 3 + %sub5 = add nsw i64 %mul4, -18 + %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub5, i64 %sub, i64 %add + store i32 %conv, i32* %arrayidx7, align 4 + %mul8 = mul nsw i64 %i.03, 3 + %add9 = add nsw i64 %mul8, %j.02 + %arrayidx12 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.03, i64 %i.03, i64 %add9 + %0 = load i32* %arrayidx12, align 4 +; CHECK: da analyze - flow [< -16] splitable! +; CHECK: da analyze - split level = 1, iteration = 11! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.inc13 + +for.inc13: ; preds = %for.body3 + %inc14 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc14, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end15 + +for.end15: ; preds = %for.inc13 + ret void +} + + +;; propagate Line +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[i + 1][4*i + j + 2] = ... +;; ... = A[2*i][8*i + j]; + +define void @prop6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc12, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc12 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc13, %for.inc12 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 2 + %add = add nsw i64 %mul, %j.02 + %add4 = add nsw i64 %add, 2 + %add5 = add nsw i64 %i.03, 1 + %arrayidx6 = getelementptr inbounds [100 x i32]* %A, i64 %add5, i64 %add4 + store i32 %conv, i32* %arrayidx6, align 4 + %mul7 = shl nsw i64 %i.03, 3 + %add8 = add nsw i64 %mul7, %j.02 + %mul9 = shl nsw i64 %i.03, 1 + %arrayidx11 = getelementptr inbounds [100 x i32]* %A, i64 %mul9, i64 %add8 + %0 = load i32* %arrayidx11, align 4 +; CHECK: da analyze - flow [=> -2]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.inc12 + +for.inc12: ; preds = %for.body3 + %inc13 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc13, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end14 + +for.end14: ; preds = %for.inc12 + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[2*i + 4][-5*i + j + 2] = ... +;; ... = A[-2*i + 20][5*i + j]; + +define void @prop7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc14, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc14 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc15, %for.inc14 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, -5 + %add = add nsw i64 %mul, %j.02 + %add4 = add nsw i64 %add, 2 + %mul5 = shl nsw i64 %i.03, 1 + %add6 = add nsw i64 %mul5, 4 + %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %add6, i64 %add4 + store i32 %conv, i32* %arrayidx7, align 4 + %mul8 = mul nsw i64 %i.03, 5 + %add9 = add nsw i64 %mul8, %j.02 + %mul10 = mul nsw i64 %i.03, -2 + %add11 = add nsw i64 %mul10, 20 + %arrayidx13 = getelementptr inbounds [100 x i32]* %A, i64 %add11, i64 %add9 + %0 = load i32* %arrayidx13, align 4 +; CHECK: da analyze - flow [* -38] splitable! +; CHECK: da analyze - split level = 1, iteration = 4! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.inc14 + +for.inc14: ; preds = %for.body3 + %inc15 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc15, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end16 + +for.end16: ; preds = %for.inc14 + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[4][j + 2] = ... +;; ... = A[-2*i + 4][5*i + j]; + +define void @prop8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc10, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc10 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %add = add nsw i64 %j.02, 2 + %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 4, i64 %add + store i32 %conv, i32* %arrayidx4, align 4 + %mul = mul nsw i64 %i.03, 5 + %add5 = add nsw i64 %mul, %j.02 + %mul6 = mul nsw i64 %i.03, -2 + %add7 = add nsw i64 %mul6, 4 + %arrayidx9 = getelementptr inbounds [100 x i32]* %A, i64 %add7, i64 %add5 + %0 = load i32* %arrayidx9, align 4 +; CHECK: da analyze - flow [p<= 2]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.inc10 + +for.inc10: ; preds = %for.body3 + %inc11 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc11, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end12 + +for.end12: ; preds = %for.inc10 + ret void +} + + +;; for (long int i = 0; i < 100; i++) +;; for (long int j = 0; j < 100; j++) +;; A[2*i + 4][5*i + j + 2] = ... +;; ... = A[4][j]; + +define void @prop9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc10, %entry + %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc10 ] + %i.03 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, 5 + %add = add nsw i64 %mul, %j.02 + %add4 = add nsw i64 %add, 2 + %mul5 = shl nsw i64 %i.03, 1 + %add6 = add nsw i64 %mul5, 4 + %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %add6, i64 %add4 + store i32 %conv, i32* %arrayidx7, align 4 + %arrayidx9 = getelementptr inbounds [100 x i32]* %A, i64 4, i64 %j.02 + %0 = load i32* %arrayidx9, align 4 +; CHECK: da analyze - flow [p<= 2]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1 + store i32 %0, i32* %B.addr.11, align 4 + %inc = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.inc10 + +for.inc10: ; preds = %for.body3 + %inc11 = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc11, 100 + br i1 %cmp, label %for.cond1.preheader, label %for.end12 + +for.end12: ; preds = %for.inc10 + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/Separability.ll b/llvm/test/Analysis/DependenceAnalysis/Separability.ll new file mode 100644 index 0000000..d42d3cd --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/Separability.ll @@ -0,0 +1,267 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'Separability.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long int i = 0; i < 50; i++) +;; for (long int j = 0; j < 50; j++) +;; for (long int k = 0; k < 50; k++) +;; for (long int l = 0; l < 50; l++) +;; A[n][i][j + k] = ... +;; ... = A[10][i + 10][2*j - l]; + +define void @sep0([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc22, %entry + %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc22 ] + %i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ] + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.inc19, %for.cond1.preheader + %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc19 ] + %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ] + br label %for.cond7.preheader + +for.cond7.preheader: ; preds = %for.inc16, %for.cond4.preheader + %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc16 ] + %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ] + br label %for.body9 + +for.body9: ; preds = %for.body9, %for.cond7.preheader + %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ] + %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ] + %conv = trunc i64 %i.07 to i32 + %add = add nsw i64 %j.05, %k.03 + %idxprom = sext i32 %n to i64 + %arrayidx11 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %idxprom, i64 %i.07, i64 %add + store i32 %conv, i32* %arrayidx11, align 4 + %mul = shl nsw i64 %j.05, 1 + %sub = sub nsw i64 %mul, %l.02 + %add12 = add nsw i64 %i.07, 10 + %arrayidx15 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub + %0 = load i32* %arrayidx15, align 4 +; CHECK: da analyze - flow [-10 * * *]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1 + store i32 %0, i32* %B.addr.31, align 4 + %inc = add nsw i64 %l.02, 1 + %cmp8 = icmp slt i64 %inc, 50 + br i1 %cmp8, label %for.body9, label %for.inc16 + +for.inc16: ; preds = %for.body9 + %inc17 = add nsw i64 %k.03, 1 + %cmp5 = icmp slt i64 %inc17, 50 + br i1 %cmp5, label %for.cond7.preheader, label %for.inc19 + +for.inc19: ; preds = %for.inc16 + %inc20 = add nsw i64 %j.05, 1 + %cmp2 = icmp slt i64 %inc20, 50 + br i1 %cmp2, label %for.cond4.preheader, label %for.inc22 + +for.inc22: ; preds = %for.inc19 + %inc23 = add nsw i64 %i.07, 1 + %cmp = icmp slt i64 %inc23, 50 + br i1 %cmp, label %for.cond1.preheader, label %for.end24 + +for.end24: ; preds = %for.inc22 + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; for (long int j = 0; j < 50; j++) +;; for (long int k = 0; k < 50; k++) +;; for (long int l = 0; l < 50; l++) +;; A[i][i][j + k] = ... +;; ... = A[10][i + 10][2*j - l]; + +define void @sep1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc22, %entry + %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc22 ] + %i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ] + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.inc19, %for.cond1.preheader + %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc19 ] + %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ] + br label %for.cond7.preheader + +for.cond7.preheader: ; preds = %for.inc16, %for.cond4.preheader + %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc16 ] + %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ] + br label %for.body9 + +for.body9: ; preds = %for.body9, %for.cond7.preheader + %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ] + %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ] + %conv = trunc i64 %i.07 to i32 + %add = add nsw i64 %j.05, %k.03 + %arrayidx11 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.07, i64 %i.07, i64 %add + store i32 %conv, i32* %arrayidx11, align 4 + %mul = shl nsw i64 %j.05, 1 + %sub = sub nsw i64 %mul, %l.02 + %add12 = add nsw i64 %i.07, 10 + %arrayidx15 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub + %0 = load i32* %arrayidx15, align 4 +; CHECK: da analyze - flow [> * * *]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1 + store i32 %0, i32* %B.addr.31, align 4 + %inc = add nsw i64 %l.02, 1 + %cmp8 = icmp slt i64 %inc, 50 + br i1 %cmp8, label %for.body9, label %for.inc16 + +for.inc16: ; preds = %for.body9 + %inc17 = add nsw i64 %k.03, 1 + %cmp5 = icmp slt i64 %inc17, 50 + br i1 %cmp5, label %for.cond7.preheader, label %for.inc19 + +for.inc19: ; preds = %for.inc16 + %inc20 = add nsw i64 %j.05, 1 + %cmp2 = icmp slt i64 %inc20, 50 + br i1 %cmp2, label %for.cond4.preheader, label %for.inc22 + +for.inc22: ; preds = %for.inc19 + %inc23 = add nsw i64 %i.07, 1 + %cmp = icmp slt i64 %inc23, 50 + br i1 %cmp, label %for.cond1.preheader, label %for.end24 + +for.end24: ; preds = %for.inc22 + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; for (long int j = 0; j < 50; j++) +;; for (long int k = 0; k < 50; k++) +;; for (long int l = 0; l < 50; l++) +;; A[i][i][i + k][l] = ... +;; ... = A[10][i + 10][j + k][l + 10]; + +define void @sep2([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc26, %entry + %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc26 ] + %i.07 = phi i64 [ 0, %entry ], [ %inc27, %for.inc26 ] + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.inc23, %for.cond1.preheader + %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc23 ] + %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc24, %for.inc23 ] + br label %for.cond7.preheader + +for.cond7.preheader: ; preds = %for.inc20, %for.cond4.preheader + %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc20 ] + %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc21, %for.inc20 ] + br label %for.body9 + +for.body9: ; preds = %for.body9, %for.cond7.preheader + %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ] + %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ] + %conv = trunc i64 %i.07 to i32 + %add = add nsw i64 %i.07, %k.03 + %arrayidx12 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 %i.07, i64 %i.07, i64 %add, i64 %l.02 + store i32 %conv, i32* %arrayidx12, align 4 + %add13 = add nsw i64 %l.02, 10 + %add14 = add nsw i64 %j.05, %k.03 + %add15 = add nsw i64 %i.07, 10 + %arrayidx19 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add15, i64 %add14, i64 %add13 + %0 = load i32* %arrayidx19, align 4 +; CHECK: da analyze - flow [> * * -10]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1 + store i32 %0, i32* %B.addr.31, align 4 + %inc = add nsw i64 %l.02, 1 + %cmp8 = icmp slt i64 %inc, 50 + br i1 %cmp8, label %for.body9, label %for.inc20 + +for.inc20: ; preds = %for.body9 + %inc21 = add nsw i64 %k.03, 1 + %cmp5 = icmp slt i64 %inc21, 50 + br i1 %cmp5, label %for.cond7.preheader, label %for.inc23 + +for.inc23: ; preds = %for.inc20 + %inc24 = add nsw i64 %j.05, 1 + %cmp2 = icmp slt i64 %inc24, 50 + br i1 %cmp2, label %for.cond4.preheader, label %for.inc26 + +for.inc26: ; preds = %for.inc23 + %inc27 = add nsw i64 %i.07, 1 + %cmp = icmp slt i64 %inc27, 50 + br i1 %cmp, label %for.cond1.preheader, label %for.end28 + +for.end28: ; preds = %for.inc26 + ret void +} + + +;; for (long int i = 0; i < 50; i++) +;; for (long int j = 0; j < 50; j++) +;; for (long int k = 0; k < 50; k++) +;; for (long int l = 0; l < 50; l++) +;; A[i][i][i + k][l + k] = ... +;; ... = A[10][i + 10][j + k][l + 10]; + +define void @sep3([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc27, %entry + %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc27 ] + %i.07 = phi i64 [ 0, %entry ], [ %inc28, %for.inc27 ] + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.inc24, %for.cond1.preheader + %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc24 ] + %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc25, %for.inc24 ] + br label %for.cond7.preheader + +for.cond7.preheader: ; preds = %for.inc21, %for.cond4.preheader + %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc21 ] + %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc22, %for.inc21 ] + br label %for.body9 + +for.body9: ; preds = %for.body9, %for.cond7.preheader + %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ] + %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ] + %conv = trunc i64 %i.07 to i32 + %add = add nsw i64 %l.02, %k.03 + %add10 = add nsw i64 %i.07, %k.03 + %arrayidx13 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 %i.07, i64 %i.07, i64 %add10, i64 %add + store i32 %conv, i32* %arrayidx13, align 4 + %add14 = add nsw i64 %l.02, 10 + %add15 = add nsw i64 %j.05, %k.03 + %add16 = add nsw i64 %i.07, 10 + %arrayidx20 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add16, i64 %add15, i64 %add14 + %0 = load i32* %arrayidx20, align 4 +; CHECK: da analyze - flow [> * * *]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1 + store i32 %0, i32* %B.addr.31, align 4 + %inc = add nsw i64 %l.02, 1 + %cmp8 = icmp slt i64 %inc, 50 + br i1 %cmp8, label %for.body9, label %for.inc21 + +for.inc21: ; preds = %for.body9 + %inc22 = add nsw i64 %k.03, 1 + %cmp5 = icmp slt i64 %inc22, 50 + br i1 %cmp5, label %for.cond7.preheader, label %for.inc24 + +for.inc24: ; preds = %for.inc21 + %inc25 = add nsw i64 %j.05, 1 + %cmp2 = icmp slt i64 %inc25, 50 + br i1 %cmp2, label %for.cond4.preheader, label %for.inc27 + +for.inc27: ; preds = %for.inc24 + %inc28 = add nsw i64 %i.07, 1 + %cmp = icmp slt i64 %inc28, 50 + br i1 %cmp, label %for.cond1.preheader, label %for.end29 + +for.end29: ; preds = %for.inc27 + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll new file mode 100644 index 0000000..be336c3 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll @@ -0,0 +1,342 @@ +; RUN: opt < %s -analyze -basicaa -indvars -da | FileCheck %s + +; ModuleID = 'StrongSIV.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (int i = 0; i < n; i++) +;; A[i + 2] = ... +;; ... = A[i]; + +define void @strong0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp sgt i64 %n, 0 + br i1 %cmp1, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %add = add nsw i32 %i.03, 2 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom + store i32 %i.03, i32* %arrayidx, align 4 + %idxprom2 = sext i32 %i.03 to i64 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2 + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - consistent flow [2]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i32 %i.03, 1 + %conv = sext i32 %inc to i64 + %cmp = icmp slt i64 %conv, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long int i = 0; i < n; i++) +;; A[i + 2] = ... +;; ... = A[i]; + +define void @strong1(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + %conv = sext i32 %n to i64 + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv2 = trunc i64 %i.03 to i32 + %add = add nsw i64 %i.03, 2 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv2, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %i.03 + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - consistent flow [2]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp slt i64 %inc, %conv + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long unsigned i = 0; i < n; i++) +;; A[i + 2] = ... +;; ... = A[i]; + +define void @strong2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %add = add i64 %i.03, 2 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.03 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - consistent flow [2]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (int i = 0; i < n; i++) +;; A[i + 2] = ... +;; ... = A[i]; + +define void @strong3(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %add = add nsw i32 %i.03, 2 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom + store i32 %i.03, i32* %arrayidx, align 4 + %idxprom1 = sext i32 %i.03 to i64 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - consistent flow [2]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i32 %i.03, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long unsigned i = 0; i < 19; i++) +;; A[i + 19] = ... +;; ... = A[i]; + +define void @strong4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %add = add i64 %i.02, 19 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 19 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 20; i++) +;; A[i + 19] = ... +;; ... = A[i]; + +define void @strong5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %add = add i64 %i.02, 19 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - consistent flow [19]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 20 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 20; i++) +;; A[2*i + 6] = ... +;; ... = A[2*i]; + +define void @strong6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = shl i64 %i.02, 1 + %add = add i64 %mul, 6 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul1 = shl i64 %i.02, 1 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %mul1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - consistent flow [3]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 20 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 20; i++) +;; A[2*i + 7] = ... +;; ... = A[2*i]; + +define void @strong7(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = shl i64 %i.02, 1 + %add = add i64 %mul, 7 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul1 = shl i64 %i.02, 1 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %mul1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 20 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 20; i++) +;; A[i + n] = ... +;; ... = A[i]; + +define void @strong8(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %add = add i64 %i.02, %n + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - consistent flow [%n|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 20 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < n; i++) +;; A[i + n] = ... +;; ... = A[i + 2*n]; + +define void @strong9(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %add = add i64 %i.03, %n + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul = shl i64 %n, 1 + %add1 = add i64 %i.03, %mul + %arrayidx2 = getelementptr inbounds i32* %A, i64 %add1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long unsigned i = 0; i < 1000; i++) +;; A[n*i + 5] = ... +;; ... = A[n*i + 5]; + +define void @strong10(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = mul i64 %i.02, %n + %add = add i64 %mul, 5 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul1 = mul i64 %i.02, %n + %add2 = add i64 %mul1, 5 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %add2 + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - consistent flow [0|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll new file mode 100644 index 0000000..2a1b4e7 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll @@ -0,0 +1,312 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'SymbolicRDIV.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long int i = 0; i < n1; i++) +;; A[2*i + n1] = ... +;; for (long int j = 0; j < n2; j++) +;; ... = A[3*j + 3*n1]; + +define void @symbolicrdiv0(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp { +entry: + %cmp4 = icmp eq i64 %n1, 0 + br i1 %cmp4, label %for.cond1.preheader, label %for.body + +for.cond1.preheader: ; preds = %for.body, %entry + %cmp21 = icmp eq i64 %n2, 0 + br i1 %cmp21, label %for.end11, label %for.body4 + +for.body: ; preds = %for.body, %entry + %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %conv = trunc i64 %i.05 to i32 + %mul = shl nsw i64 %i.05, 1 + %add = add i64 %mul, %n1 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.05, 1 + %cmp = icmp ult i64 %inc, %n1 + br i1 %cmp, label %for.body, label %for.cond1.preheader + +for.body4: ; preds = %for.body4, %for.cond1.preheader + %j.03 = phi i64 [ %inc10, %for.body4 ], [ 0, %for.cond1.preheader ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ] + %mul56 = add i64 %j.03, %n1 + %add7 = mul i64 %mul56, 3 + %arrayidx8 = getelementptr inbounds i32* %A, i64 %add7 + %0 = load i32* %arrayidx8, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc10 = add nsw i64 %j.03, 1 + %cmp2 = icmp ult i64 %inc10, %n2 + br i1 %cmp2, label %for.body4, label %for.end11 + +for.end11: ; preds = %for.body4, %for.cond1.preheader + ret void +} + + +;; for (long int i = 0; i < n1; i++) +;; A[2*i + 5*n2] = ... +;; for (long int j = 0; j < n2; j++) +;; ... = A[3*j + 2*n2]; + +define void @symbolicrdiv1(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp { +entry: + %cmp4 = icmp eq i64 %n1, 0 + br i1 %cmp4, label %for.cond2.preheader, label %for.body + +for.cond2.preheader: ; preds = %for.body, %entry + %cmp31 = icmp eq i64 %n2, 0 + br i1 %cmp31, label %for.end12, label %for.body5 + +for.body: ; preds = %for.body, %entry + %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %conv = trunc i64 %i.05 to i32 + %mul = shl nsw i64 %i.05, 1 + %mul1 = mul i64 %n2, 5 + %add = add i64 %mul, %mul1 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.05, 1 + %cmp = icmp ult i64 %inc, %n1 + br i1 %cmp, label %for.body, label %for.cond2.preheader + +for.body5: ; preds = %for.body5, %for.cond2.preheader + %j.03 = phi i64 [ %inc11, %for.body5 ], [ 0, %for.cond2.preheader ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body5 ], [ %B, %for.cond2.preheader ] + %mul6 = mul nsw i64 %j.03, 3 + %mul7 = shl i64 %n2, 1 + %add8 = add i64 %mul6, %mul7 + %arrayidx9 = getelementptr inbounds i32* %A, i64 %add8 + %0 = load i32* %arrayidx9, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc11 = add nsw i64 %j.03, 1 + %cmp3 = icmp ult i64 %inc11, %n2 + br i1 %cmp3, label %for.body5, label %for.end12 + +for.end12: ; preds = %for.body5, %for.cond2.preheader + ret void +} + + +;; for (long int i = 0; i < n1; i++) +;; A[2*i - n2] = ... +;; for (long int j = 0; j < n2; j++) +;; ... = A[-j + 2*n1]; + +define void @symbolicrdiv2(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp { +entry: + %cmp4 = icmp eq i64 %n1, 0 + br i1 %cmp4, label %for.cond1.preheader, label %for.body + +for.cond1.preheader: ; preds = %for.body, %entry + %cmp21 = icmp eq i64 %n2, 0 + br i1 %cmp21, label %for.end10, label %for.body4 + +for.body: ; preds = %for.body, %entry + %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %conv = trunc i64 %i.05 to i32 + %mul = shl nsw i64 %i.05, 1 + %sub = sub i64 %mul, %n2 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.05, 1 + %cmp = icmp ult i64 %inc, %n1 + br i1 %cmp, label %for.body, label %for.cond1.preheader + +for.body4: ; preds = %for.body4, %for.cond1.preheader + %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ] + %mul6 = shl i64 %n1, 1 + %add = sub i64 %mul6, %j.03 + %arrayidx7 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx7, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc9 = add nsw i64 %j.03, 1 + %cmp2 = icmp ult i64 %inc9, %n2 + br i1 %cmp2, label %for.body4, label %for.end10 + +for.end10: ; preds = %for.body4, %for.cond1.preheader + ret void +} + + +;; for (long int i = 0; i < n1; i++) +;; A[-i + n2] = ... +;; for (long int j = 0; j < n2; j++) +;; ... = A[j - n1]; + +define void @symbolicrdiv3(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp { +entry: + %cmp4 = icmp eq i64 %n1, 0 + br i1 %cmp4, label %for.cond1.preheader, label %for.body + +for.cond1.preheader: ; preds = %for.body, %entry + %cmp21 = icmp eq i64 %n2, 0 + br i1 %cmp21, label %for.end9, label %for.body4 + +for.body: ; preds = %for.body, %entry + %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %conv = trunc i64 %i.05 to i32 + %add = sub i64 %n2, %i.05 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.05, 1 + %cmp = icmp ult i64 %inc, %n1 + br i1 %cmp, label %for.body, label %for.cond1.preheader + +for.body4: ; preds = %for.body4, %for.cond1.preheader + %j.03 = phi i64 [ %inc8, %for.body4 ], [ 0, %for.cond1.preheader ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ] + %sub5 = sub i64 %j.03, %n1 + %arrayidx6 = getelementptr inbounds i32* %A, i64 %sub5 + %0 = load i32* %arrayidx6, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc8 = add nsw i64 %j.03, 1 + %cmp2 = icmp ult i64 %inc8, %n2 + br i1 %cmp2, label %for.body4, label %for.end9 + +for.end9: ; preds = %for.body4, %for.cond1.preheader + ret void +} + + +;; for (long int i = 0; i < n1; i++) +;; A[-i + 2*n1] = ... +;; for (long int j = 0; j < n2; j++) +;; ... = A[-j + n1]; + +define void @symbolicrdiv4(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp { +entry: + %cmp4 = icmp eq i64 %n1, 0 + br i1 %cmp4, label %for.cond1.preheader, label %for.body + +for.cond1.preheader: ; preds = %for.body, %entry + %cmp21 = icmp eq i64 %n2, 0 + br i1 %cmp21, label %for.end10, label %for.body4 + +for.body: ; preds = %for.body, %entry + %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %conv = trunc i64 %i.05 to i32 + %mul = shl i64 %n1, 1 + %add = sub i64 %mul, %i.05 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.05, 1 + %cmp = icmp ult i64 %inc, %n1 + br i1 %cmp, label %for.body, label %for.cond1.preheader + +for.body4: ; preds = %for.body4, %for.cond1.preheader + %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ] + %add6 = sub i64 %n1, %j.03 + %arrayidx7 = getelementptr inbounds i32* %A, i64 %add6 + %0 = load i32* %arrayidx7, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc9 = add nsw i64 %j.03, 1 + %cmp2 = icmp ult i64 %inc9, %n2 + br i1 %cmp2, label %for.body4, label %for.end10 + +for.end10: ; preds = %for.body4, %for.cond1.preheader + ret void +} + + +;; for (long int i = 0; i < n1; i++) +;; A[-i + n2] = ... +;; for (long int j = 0; j < n2; j++) +;; ... = A[-j + 2*n2]; + +define void @symbolicrdiv5(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp { +entry: + %cmp4 = icmp eq i64 %n1, 0 + br i1 %cmp4, label %for.cond1.preheader, label %for.body + +for.cond1.preheader: ; preds = %for.body, %entry + %cmp21 = icmp eq i64 %n2, 0 + br i1 %cmp21, label %for.end10, label %for.body4 + +for.body: ; preds = %for.body, %entry + %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %conv = trunc i64 %i.05 to i32 + %add = sub i64 %n2, %i.05 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.05, 1 + %cmp = icmp ult i64 %inc, %n1 + br i1 %cmp, label %for.body, label %for.cond1.preheader + +for.body4: ; preds = %for.body4, %for.cond1.preheader + %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ] + %mul = shl i64 %n2, 1 + %add6 = sub i64 %mul, %j.03 + %arrayidx7 = getelementptr inbounds i32* %A, i64 %add6 + %0 = load i32* %arrayidx7, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc9 = add nsw i64 %j.03, 1 + %cmp2 = icmp ult i64 %inc9, %n2 + br i1 %cmp2, label %for.body4, label %for.end10 + +for.end10: ; preds = %for.body4, %for.cond1.preheader + ret void +} + + +;; for (long int i = 0; i < n1; i++) +;; for (long int j = 0; j < n2; j++) +;; A[j -i + n2] = ... +;; ... = A[2*n2]; + +define void @symbolicrdiv6(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp { +entry: + %cmp4 = icmp eq i64 %n1, 0 + br i1 %cmp4, label %for.end7, label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc5, %entry + %B.addr.06 = phi i32* [ %B.addr.1.lcssa, %for.inc5 ], [ %B, %entry ] + %i.05 = phi i64 [ %inc6, %for.inc5 ], [ 0, %entry ] + %cmp21 = icmp eq i64 %n2, 0 + br i1 %cmp21, label %for.inc5, label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %j.03 = phi i64 [ %inc, %for.body3 ], [ 0, %for.cond1.preheader ] + %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.06, %for.cond1.preheader ] + %conv = trunc i64 %i.05 to i32 + %sub = sub nsw i64 %j.03, %i.05 + %add = add i64 %sub, %n2 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul = shl i64 %n2, 1 + %arrayidx4 = getelementptr inbounds i32* %A, i64 %mul + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1 + store i32 %0, i32* %B.addr.12, align 4 + %inc = add nsw i64 %j.03, 1 + %cmp2 = icmp ult i64 %inc, %n2 + br i1 %cmp2, label %for.body3, label %for.inc5 + +for.inc5: ; preds = %for.body3, %for.cond1.preheader + %B.addr.1.lcssa = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ] + %inc6 = add nsw i64 %i.05, 1 + %cmp = icmp ult i64 %inc6, %n1 + br i1 %cmp, label %for.cond1.preheader, label %for.end7 + +for.end7: ; preds = %for.inc5, %entry + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll new file mode 100644 index 0000000..ee2343f --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll @@ -0,0 +1,330 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'SymbolicSIV.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long int i = 0; i < n; i++) +;; A[2*i + n] = ... +;; ... = A[3*i + 3*n]; + +define void @symbolicsiv0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 1 + %add = add i64 %mul, %n + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul14 = add i64 %i.03, %n + %add3 = mul i64 %mul14, 3 + %arrayidx4 = getelementptr inbounds i32* %A, i64 %add3 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long int i = 0; i < n; i++) +;; A[2*i + 5*n] = ... +;; ... = A[3*i + 2*n]; + +define void @symbolicsiv1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 1 + %mul1 = mul i64 %n, 5 + %add = add i64 %mul, %mul1 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul2 = mul nsw i64 %i.03, 3 + %mul3 = shl i64 %n, 1 + %add4 = add i64 %mul2, %mul3 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %add4 + %0 = load i32* %arrayidx5, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long int i = 0; i < n; i++) +;; A[2*i - n] = ... +;; ... = A[-i + 2*n]; + +define void @symbolicsiv2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = shl nsw i64 %i.03, 1 + %sub = sub i64 %mul, %n + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %mul2 = shl i64 %n, 1 + %add = sub i64 %mul2, %i.03 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long int i = 0; i < n; i++) +;; A[-2*i + n + 1] = ... +;; ... = A[i - 2*n]; + +define void @symbolicsiv3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, -2 + %add = add i64 %mul, %n + %add1 = add i64 %add, 1 + %arrayidx = getelementptr inbounds i32* %A, i64 %add1 + store i32 %conv, i32* %arrayidx, align 4 + %mul2 = shl i64 %n, 1 + %sub = sub i64 %i.03, %mul2 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long int i = 0; i < n; i++) +;; A[-2*i + 3*n] = ... +;; ... = A[-i + n]; + +define void @symbolicsiv4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, -2 + %mul1 = mul i64 %n, 3 + %add = add i64 %mul, %mul1 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %add2 = sub i64 %n, %i.03 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %add2 + %0 = load i32* %arrayidx3, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long int i = 0; i < n; i++) +;; A[-2*i - 2*n] = ... +;; ... = A[-i - n]; + +define void @symbolicsiv5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = mul nsw i64 %i.03, -2 + %mul1 = shl i64 %n, 1 + %sub = sub i64 %mul, %mul1 + %arrayidx = getelementptr inbounds i32* %A, i64 %sub + store i32 %conv, i32* %arrayidx, align 4 + %sub2 = sub nsw i64 0, %i.03 + %sub3 = sub i64 %sub2, %n + %arrayidx4 = getelementptr inbounds i32* %A, i64 %sub3 + %0 = load i32* %arrayidx4, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; why doesn't SCEV package understand that n >= 0? +;;void weaktest(int *A, int *B, long unsigned n) +;; for (long unsigned i = 0; i < n; i++) +;; A[i + n + 1] = ... +;; ... = A[-i]; + +define void @weaktest(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %add = add i64 %i.03, %n + %add1 = add i64 %add, 1 + %arrayidx = getelementptr inbounds i32* %A, i64 %add1 + store i32 %conv, i32* %arrayidx, align 4 + %sub = sub i64 0, %i.03 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - flow [*|<] splitable! +; CHECK: da analyze - split level = 1, iteration = ((0 smax (-1 + (-1 * %n))) /u 2)! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; void symbolicsiv6(int *A, int *B, long unsigned n, long unsigned N, long unsigned M) { +;; for (long int i = 0; i < n; i++) { +;; A[4*N*i + M] = i; +;; *B++ = A[4*N*i + 3*M + 1]; + +define void @symbolicsiv6(i32* %A, i32* %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ] + %conv = trunc i64 %i.03 to i32 + %mul = shl i64 %N, 2 + %mul1 = mul i64 %mul, %i.03 + %add = add i64 %mul1, %M + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul2 = shl i64 %N, 2 + %mul3 = mul i64 %mul2, %i.03 + %mul4 = mul i64 %M, 3 + %add5 = add i64 %mul3, %mul4 + %add6 = add i64 %add5, 1 + %arrayidx7 = getelementptr inbounds i32* %A, i64 %add6 + %0 = load i32* %arrayidx7, align 4 + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 +; CHECK: da analyze - none! + store i32 %0, i32* %B.addr.02, align 4 + %inc = add nsw i64 %i.03, 1 + %exitcond = icmp ne i64 %inc, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + + +;; void symbolicsiv7(int *A, int *B, long unsigned n, long unsigned N, long unsigned M) { +;; for (long int i = 0; i < n; i++) { +;; A[2*N*i + M] = i; +;; *B++ = A[2*N*i - 3*M + 2]; + +define void @symbolicsiv7(i32* %A, i32* %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ] + %conv = trunc i64 %i.03 to i32 + %mul = shl i64 %N, 1 + %mul1 = mul i64 %mul, %i.03 + %add = add i64 %mul1, %M + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul2 = shl i64 %N, 1 + %mul3 = mul i64 %mul2, %i.03 + %0 = mul i64 %M, -3 + %sub = add i64 %mul3, %0 + %add5 = add i64 %sub, 2 + %arrayidx6 = getelementptr inbounds i32* %A, i64 %add5 + %1 = load i32* %arrayidx6, align 4 + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 +; CHECK: da analyze - flow [<>]! + store i32 %1, i32* %B.addr.02, align 4 + %inc = add nsw i64 %i.03, 1 + %exitcond = icmp ne i64 %inc, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll new file mode 100644 index 0000000..343e8f4 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll @@ -0,0 +1,220 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'WeakCrossingSIV.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long unsigned i = 0; i < n; i++) +;; A[1 + n*i] = ... +;; ... = A[1 - n*i]; + +define void @weakcrossing0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = mul i64 %i.03, %n + %add = add i64 %mul, 1 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %mul1 = mul i64 %i.03, %n + %sub = sub i64 1, %mul1 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - flow [0|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long unsigned i = 0; i < n; i++) +;; A[n + i] = ... +;; ... = A[1 + n - i]; + +define void @weakcrossing1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %add = add i64 %i.03, %n + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %add1 = add i64 %n, 1 + %sub = sub i64 %add1, %i.03 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - flow [<>] splitable! +; CHECK: da analyze - split level = 1, iteration = 0! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long unsigned i = 0; i < 3; i++) +;; A[i] = ... +;; ... = A[6 - i]; + +define void @weakcrossing2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 %i.02 + store i32 %conv, i32* %arrayidx, align 4 + %sub = sub i64 6, %i.02 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 3 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 4; i++) +;; A[i] = ... +;; ... = A[6 - i]; + +define void @weakcrossing3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 %i.02 + store i32 %conv, i32* %arrayidx, align 4 + %sub = sub i64 6, %i.02 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [0|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 4 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 10; i++) +;; A[i] = ... +;; ... = A[-6 - i]; + +define void @weakcrossing4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 %i.02 + store i32 %conv, i32* %arrayidx, align 4 + %sub = sub i64 -6, %i.02 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 10 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < n; i++) +;; A[3*i] = ... +;; ... = A[5 - 3*i]; + +define void @weakcrossing5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = mul i64 %i.03, 3 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %0 = mul i64 %i.03, -3 + %sub = add i64 %0, 5 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub + %1 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %1, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long unsigned i = 0; i < 4; i++) +;; A[i] = ... +;; ... = A[5 - i]; + +define void @weakcrossing6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 %i.02 + store i32 %conv, i32* %arrayidx, align 4 + %sub = sub i64 5, %i.02 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [<>] splitable! +; CHECK: da analyze - split level = 1, iteration = 2! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 4 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll new file mode 100644 index 0000000..a598716 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll @@ -0,0 +1,212 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'WeakZeroDstSIV.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long unsigned i = 0; i < 30; i++) +;; A[2*i + 10] = ... +;; ... = A[10]; + +define void @weakzerodst0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = shl i64 %i.02, 1 + %add = add i64 %mul, 10 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 10 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [p<=|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 30 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < n; i++) +;; A[n*i + 10] = ... +;; ... = A[10]; + +define void @weakzerodst1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = mul i64 %i.03, %n + %add = add i64 %mul, 10 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 10 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [p<=|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long unsigned i = 0; i < 5; i++) +;; A[2*i] = ... +;; ... = A[10]; + +define void @weakzerodst2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = shl i64 %i.02, 1 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 10 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 5 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 6; i++) +;; A[2*i] = ... +;; ... = A[10]; + +define void @weakzerodst3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = shl i64 %i.02, 1 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 10 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [=>p|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 6 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 7; i++) +;; A[2*i] = ... +;; ... = A[10]; + +define void @weakzerodst4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = shl i64 %i.02, 1 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 10 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [*|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 7 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 7; i++) +;; A[2*i] = ... +;; ... = A[-10]; + +define void @weakzerodst5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %mul = shl i64 %i.02, 1 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 -10 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 7 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < n; i++) +;; A[3*i] = ... +;; ... = A[10]; + +define void @weakzerodst6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %mul = mul i64 %i.03, 3 + %arrayidx = getelementptr inbounds i32* %A, i64 %mul + store i32 %conv, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 10 + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll new file mode 100644 index 0000000..fd4f462 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll @@ -0,0 +1,212 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'WeakZeroSrcSIV.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; for (long unsigned i = 0; i < 30; i++) +;; A[10] = ... +;; ... = A[2*i + 10]; + +define void @weakzerosrc0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 10 + store i32 %conv, i32* %arrayidx, align 4 + %mul = shl i64 %i.02, 1 + %add = add i64 %mul, 10 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [p<=|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 30 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < n; i++) +;; A[10] = ... +;; ... = A[n*i + 10]; + +define void @weakzerosrc1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 10 + store i32 %conv, i32* %arrayidx, align 4 + %mul = mul i64 %i.03, %n + %add = add i64 %mul, 10 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [p<=|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +;; for (long unsigned i = 0; i < 5; i++) +;; A[10] = ... +;; ... = A[2*i]; + +define void @weakzerosrc2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 10 + store i32 %conv, i32* %arrayidx, align 4 + %mul = shl i64 %i.02, 1 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 5 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 6; i++) +;; A[10] = ... +;; ... = A[2*i]; + +define void @weakzerosrc3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 10 + store i32 %conv, i32* %arrayidx, align 4 + %mul = shl i64 %i.02, 1 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [=>p|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 6 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 7; i++) +;; A[10] = ... +;; ... = A[2*i]; + +define void @weakzerosrc4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 10 + store i32 %conv, i32* %arrayidx, align 4 + %mul = shl i64 %i.02, 1 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow [*|<]! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 7 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < 7; i++) +;; A[-10] = ... +;; ... = A[2*i]; + +define void @weakzerosrc5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] + %conv = trunc i64 %i.02 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 -10 + store i32 %conv, i32* %arrayidx, align 4 + %mul = shl i64 %i.02, 1 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1 + store i32 %0, i32* %B.addr.01, align 4 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 7 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +;; for (long unsigned i = 0; i < n; i++) +;; A[10] = ... +;; ... = A[3*i]; + +define void @weakzerosrc6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %cmp1 = icmp eq i64 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] + %conv = trunc i64 %i.03 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 10 + store i32 %conv, i32* %arrayidx, align 4 + %mul = mul i64 %i.03, 3 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1 + store i32 %0, i32* %B.addr.02, align 4 + %inc = add i64 %i.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/ZIV.ll b/llvm/test/Analysis/DependenceAnalysis/ZIV.ll new file mode 100644 index 0000000..42b2389 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/ZIV.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; ModuleID = 'ZIV.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + + +;; A[n + 1] = ... +;; ... = A[1 + n]; + +define void @z0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %add = add i64 %n, 1 + %arrayidx = getelementptr inbounds i32* %A, i64 %add + store i32 0, i32* %arrayidx, align 4 + %add1 = add i64 %n, 1 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %add1 + %0 = load i32* %arrayidx2, align 4 +; CHECK: da analyze - consistent flow! + store i32 %0, i32* %B, align 4 + ret void +} + + +;; A[n] = ... +;; ... = A[n + 1]; + +define void @z1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp { +entry: + %arrayidx = getelementptr inbounds i32* %A, i64 %n + store i32 0, i32* %arrayidx, align 4 + %add = add i64 %n, 1 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %add + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - none! + store i32 %0, i32* %B, align 4 + ret void +} + + +;; A[n] = ... +;; ... = A[m]; + +define void @z2(i32* %A, i32* %B, i64 %n, i64 %m) nounwind uwtable ssp { +entry: + %arrayidx = getelementptr inbounds i32* %A, i64 %n + store i32 0, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i64 %m + %0 = load i32* %arrayidx1, align 4 +; CHECK: da analyze - flow! + store i32 %0, i32* %B, align 4 + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/lit.local.cfg b/llvm/test/Analysis/DependenceAnalysis/lit.local.cfg new file mode 100644 index 0000000..c6106e4 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.ll'] -- 2.7.4