From: Sebastian Pop Date: Tue, 3 Jun 2014 18:16:31 +0000 (+0000) Subject: record delinearization result and reuse it in polyhedral translation X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=422e33f36334fbac32304ccd7c0f2186cb0b7395;p=platform%2Fupstream%2Fllvm.git record delinearization result and reuse it in polyhedral translation Without this patch, the testcase would fail on the delinearization of the second array: ; void foo(long n, long m, long o, double A[n][m][o]) { ; for (long i = 0; i < n; i++) ; for (long j = 0; j < m; j++) ; for (long k = 0; k < o; k++) { ; A[i+3][j-4][k+7] = 1.0; ; A[i][0][k] = 2.0; ; } ; } ; CHECK: [n, m, o] -> { Stmt_for_body6[i0, i1, i2] -> MemRef_A[3 + i0, -4 + i1, 7 + i2] }; ; CHECK: [n, m, o] -> { Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0, 0, i2] }; Here is the output of FileCheck on the testcase without this patch: ; CHECK: [n, m, o] -> { Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0, 0, i2] }; ^ :26:2: note: possible intended match here [n, m, o] -> { Stmt_for_body6[i0, i1, i2] -> MemRef_A[o0] }; ^ It is possible to find a good delinearization for A[i][0][k] only in the context of the delinearization of both array accesses. There are two ways to delinearize together all array subscripts touching the same base address: either duplicate the code from scop detection to first gather all array references and then run the delinearization; or as implemented in this patch, use the same delinearization info that we computed during scop detection. llvm-svn: 210117 --- diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h index e4f8f2b..d72c5aa 100644 --- a/polly/include/polly/ScopDetection.h +++ b/polly/include/polly/ScopDetection.h @@ -75,7 +75,33 @@ class Value; namespace polly { typedef std::set ParamSetType; -typedef std::vector AFs; +// Description of the shape of an array. +struct ArrayShape { + // Base pointer identifying all accesses to this array. + const SCEVUnknown *BasePointer; + + // Sizes of each delinearized dimension. + SmallVector DelinearizedSizes; + + ArrayShape(const SCEVUnknown *B) : BasePointer(B), DelinearizedSizes() {} +}; + +struct MemAcc { + const Instruction *Insn; + + // A pointer to the shape description of the array. + ArrayShape *Shape; + + // Subscripts computed by delinearization. + SmallVector DelinearizedSubscripts; + + MemAcc(const Instruction *I, ArrayShape *S) + : Insn(I), Shape(S), DelinearizedSubscripts() {} +}; + +typedef std::map MapInsnToMemAcc; +typedef std::pair PairInsnAddRec; +typedef std::vector AFs; typedef std::map BaseToAFs; typedef std::map BaseToElSize; diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index 9d895c8..16332ea 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -347,36 +347,45 @@ bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const { return true; } +MapInsnToMemAcc InsnToMemAcc; + bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { for (auto P : Context.NonAffineAccesses) { const SCEVUnknown *BasePointer = P.first; Value *BaseValue = BasePointer->getValue(); + ArrayShape *Shape = new ArrayShape(BasePointer); // First step: collect parametric terms in all array references. SmallVector Terms; - for (const SCEVAddRecExpr *AF : Context.NonAffineAccesses[BasePointer]) - AF->collectParametricTerms(*SE, Terms); + for (PairInsnAddRec PIAF : Context.NonAffineAccesses[BasePointer]) + PIAF.second->collectParametricTerms(*SE, Terms); // Also collect terms from the affine memory accesses. - for (const SCEVAddRecExpr *AF : Context.AffineAccesses[BasePointer]) - AF->collectParametricTerms(*SE, Terms); + for (PairInsnAddRec PIAF : Context.AffineAccesses[BasePointer]) + PIAF.second->collectParametricTerms(*SE, Terms); // Second step: find array shape. - SmallVector Sizes; - SE->findArrayDimensions(Terms, Sizes, Context.ElementSize[BasePointer]); + SE->findArrayDimensions(Terms, Shape->DelinearizedSizes, + Context.ElementSize[BasePointer]); // Third step: compute the access functions for each subscript. - for (const SCEVAddRecExpr *AF : Context.NonAffineAccesses[BasePointer]) { - if (Sizes.empty()) - return invalid(Context, /*Assert=*/true, AF); - - SmallVector Subscripts; - AF->computeAccessFunctions(*SE, Subscripts, Sizes); - if (Sizes.empty() || Subscripts.empty()) + for (PairInsnAddRec PIAF : Context.NonAffineAccesses[BasePointer]) { + const SCEVAddRecExpr *AF = PIAF.second; + const Instruction *Insn = PIAF.first; + if (Shape->DelinearizedSizes.empty()) + return invalid(Context, /*Assert=*/true, + PIAF.second); + + MemAcc *Acc = new MemAcc(Insn, Shape); + InsnToMemAcc.insert({ Insn, Acc }); + AF->computeAccessFunctions(*SE, Acc->DelinearizedSubscripts, + Shape->DelinearizedSizes); + if (Shape->DelinearizedSizes.empty() || + Acc->DelinearizedSubscripts.empty()) return invalid(Context, /*Assert=*/true, AF); // Check that the delinearized subscripts are affine. - for (const SCEV *S : Subscripts) + for (const SCEV *S : Acc->DelinearizedSubscripts) if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseValue)) return invalid(Context, /*Assert=*/true, AF); } @@ -430,12 +439,12 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst, // accesses to the same array in a unique step. if (Context.NonAffineAccesses[BasePointer].size() == 0) Context.NonAffineAccesses[BasePointer] = AFs(); - Context.NonAffineAccesses[BasePointer].push_back(AF); + Context.NonAffineAccesses[BasePointer].push_back({ &Inst, AF }); } else if (const SCEVAddRecExpr *AF = dyn_cast(AccessFunction)) { if (Context.AffineAccesses[BasePointer].size() == 0) Context.AffineAccesses[BasePointer] = AFs(); - Context.AffineAccesses[BasePointer].push_back(AF); + Context.AffineAccesses[BasePointer].push_back({ &Inst, AF }); } // FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 185aba4..66f6c29 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -344,9 +344,9 @@ MemoryAccess::MemoryAccess(const IRAccess &Access, const Instruction *AccInst, isl_pw_aff *Affine = SCEVAffinator::getPwAff(Statement, Access.Subscripts[i]); - if (i == Size - 1) { - // Divide the access function of the last subscript by the size of the - // elements in the array. + if (Size == 1) { + // For the non delinearized arrays, divide the access function of the last + // subscript by the size of the elements in the array. // // A stride one array access in C expressed as A[i] is expressed in // LLVM-IR as something like A[i * elementsize]. This hides the fact that diff --git a/polly/lib/Analysis/TempScopInfo.cpp b/polly/lib/Analysis/TempScopInfo.cpp index 02f2da9..2901c93 100644 --- a/polly/lib/Analysis/TempScopInfo.cpp +++ b/polly/lib/Analysis/TempScopInfo.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "polly/TempScopInfo.h" +#include "polly/ScopDetection.h" #include "polly/LinkAllPasses.h" #include "polly/CodeGen/BlockGenerators.h" #include "polly/Support/GICHelper.h" @@ -143,6 +144,8 @@ bool TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R) { return AnyCrossStmtUse; } +extern MapInsnToMemAcc InsnToMemAcc; + IRAccess TempScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R) { unsigned Size; Type *SizeType; @@ -167,34 +170,14 @@ IRAccess TempScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R) { AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); SmallVector Subscripts, Sizes; - bool IsAffine = isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue()); - const SCEVAddRecExpr *AF = dyn_cast(AccessFunction); - - if (!IsAffine && PollyDelinearize && AF) { - const SCEV *ElementSize = SE->getElementSize(Inst); - AF->delinearize(*SE, Subscripts, Sizes, ElementSize); - int NSubs = Subscripts.size(); - - if (NSubs > 0) { - // Normalize the last dimension: integrate the size of the "scalar - // dimension" and the remainder of the delinearization. - Subscripts[NSubs - 1] = - SE->getMulExpr(Subscripts[NSubs - 1], Sizes[NSubs - 1]); - - IsAffine = true; - for (int i = 0; i < NSubs; ++i) - if (!isAffineExpr(R, Subscripts[i], *SE, BasePointer->getValue())) { - IsAffine = false; - break; - } - } - } - - if (Subscripts.size() == 0) { - Subscripts.push_back(AccessFunction); - Sizes.push_back(SE->getConstant(ZeroOffset->getType(), Size)); - } + MemAcc *Acc = InsnToMemAcc[Inst]; + if (PollyDelinearize && Acc) + return IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, true, + Acc->DelinearizedSubscripts, Acc->Shape->DelinearizedSizes); + bool IsAffine = isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue()); + Subscripts.push_back(AccessFunction); + Sizes.push_back(SE->getConstant(ZeroOffset->getType(), Size)); return IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, IsAffine, Subscripts, Sizes); } diff --git a/polly/test/ScopInfo/delinearize-together-all-data-refs.ll b/polly/test/ScopInfo/delinearize-together-all-data-refs.ll new file mode 100644 index 0000000..464c356 --- /dev/null +++ b/polly/test/ScopInfo/delinearize-together-all-data-refs.ll @@ -0,0 +1,75 @@ +; RUN: opt %loadPolly -polly-scops -analyze -polly-delinearize < %s | FileCheck %s + +; void foo(long n, long m, long o, double A[n][m][o]) { +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; for (long k = 0; k < o; k++) { +; A[i+3][j-4][k+7] = 1.0; +; A[i][0][k] = 2.0; +; } +; } + +; CHECK: [n, m, o] -> { Stmt_for_body6[i0, i1, i2] -> MemRef_A[3 + i0, -4 + i1, 7 + i2] }; +; CHECK: [n, m, o] -> { Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0, 0, i2] }; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo(i64 %n, i64 %m, i64 %o, double* nocapture %A) { +entry: + %cmp35 = icmp sgt i64 %n, 0 + br i1 %cmp35, label %for.cond1.preheader.lr.ph, label %for.end18 + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp233 = icmp sgt i64 %m, 0 + %cmp531 = icmp sgt i64 %o, 0 + %0 = mul nuw i64 %o, %m + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc16, %for.cond1.preheader.lr.ph + %i.036 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %inc17, %for.inc16 ] + br i1 %cmp233, label %for.cond4.preheader.lr.ph, label %for.inc16 + +for.cond4.preheader.lr.ph: ; preds = %for.cond1.preheader + %add7 = add nsw i64 %i.036, 3 + %1 = mul nsw i64 %add7, %0 + %add = add i64 %1, 7 + %2 = mul nsw i64 %i.036, %0 + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.inc13, %for.cond4.preheader.lr.ph + %j.034 = phi i64 [ 0, %for.cond4.preheader.lr.ph ], [ %inc14, %for.inc13 ] + br i1 %cmp531, label %for.body6.lr.ph, label %for.inc13 + +for.body6.lr.ph: ; preds = %for.cond4.preheader + %sub = add nsw i64 %j.034, -4 + %3 = mul nsw i64 %sub, %o + %arrayidx.sum = add i64 %add, %3 + br label %for.body6 + +for.body6: ; preds = %for.body6, %for.body6.lr.ph + %k.032 = phi i64 [ 0, %for.body6.lr.ph ], [ %inc, %for.body6 ] + %arrayidx8.sum = add i64 %arrayidx.sum, %k.032 + %arrayidx9 = getelementptr inbounds double* %A, i64 %arrayidx8.sum + store double 1.000000e+00, double* %arrayidx9, align 8 + %arrayidx10.sum = add i64 %k.032, %2 + %arrayidx12 = getelementptr inbounds double* %A, i64 %arrayidx10.sum + store double 2.000000e+00, double* %arrayidx12, align 8 + %inc = add nsw i64 %k.032, 1 + %exitcond = icmp eq i64 %inc, %o + br i1 %exitcond, label %for.inc13, label %for.body6 + +for.inc13: ; preds = %for.body6, %for.cond4.preheader + %inc14 = add nsw i64 %j.034, 1 + %exitcond37 = icmp eq i64 %inc14, %m + br i1 %exitcond37, label %for.inc16, label %for.cond4.preheader + +for.inc16: ; preds = %for.inc13, %for.cond1.preheader + %inc17 = add nsw i64 %i.036, 1 + %exitcond38 = icmp eq i64 %inc17, %n + br i1 %exitcond38, label %for.end18, label %for.cond1.preheader + +for.end18: ; preds = %for.inc16, %entry + ret void +}