SLPVectorizer: limit the number of alias checks to reduce the runtime.
authorErik Eckstein <eeckstein@apple.com>
Mon, 19 Jan 2015 09:33:38 +0000 (09:33 +0000)
committerErik Eckstein <eeckstein@apple.com>
Mon, 19 Jan 2015 09:33:38 +0000 (09:33 +0000)
In case of blocks with many memory-accessing instructions, alias checking can take lot of time
(because calculating the memory dependencies has quadratic complexity).
I chose a limit which resulted in no changes when running the benchmarks.

llvm-svn: 226439

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

index fa6f6cd..3524d84 100644 (file)
@@ -75,6 +75,10 @@ static const unsigned MinVecRegSize = 128;
 
 static const unsigned RecursionMaxDepth = 12;
 
+// Limit the number of alias checks. The limit is chosen so that
+// it has no negative effect on the llvm benchmarks.
+static const unsigned AliasedCheckLimit = 10;
+
 /// \returns the parent basic block if all of the instructions in \p VL
 /// are in the same block or null otherwise.
 static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
@@ -2754,11 +2758,22 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
           Instruction *SrcInst = BundleMember->Inst;
           AliasAnalysis::Location SrcLoc = getLocation(SrcInst, SLP->AA);
           bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
+          unsigned numAliased = 0;
 
           while (DepDest) {
             assert(isInSchedulingRegion(DepDest));
             if (SrcMayWrite || DepDest->Inst->mayWriteToMemory()) {
-              if (SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)) {
+
+              // Limit the number of alias checks, becaus SLP->isAliased() is
+              // the expensive part in the following loop.
+              if (numAliased >= AliasedCheckLimit
+                  || SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)) {
+
+                // We increment the counter only if the locations are aliased
+                // (instead of counting all alias checks). This gives a better
+                // balance between reduced runtime accurate dependencies.
+                numAliased++;
+
                 DepDest->MemoryDependencies.push_back(BundleMember);
                 BundleMember->Dependencies++;
                 ScheduleData *DestBundle = DepDest->FirstInBundle;