From 75dc40c3bee1ee9addc3106678c541d31040bb00 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Sun, 20 Dec 2015 13:31:48 +0000 Subject: [PATCH] ScopInfo: Bail out in case of complex branch structures Scops that contain many complex branches are likely to result in complex domain conditions that consist of a large (> 100) number of conjucts. Transforming such domains is expensive and unlikely to result in efficient code. To avoid long compile times we detect this case and skip such scops. In the future we may improve this by either using non-affine subregions to hide such complex condition structures or by exploiting in certain cases properties (e.g., dominance) that allow us to construct the domains of a scop in a way that results in a smaller number improving conjuncts. Example of a code that results in complex iteration spaces: loop.header / | \ \ A0 A2 A4 \ \ / \ / \ A1 A3 \ / \ / \ | B0 B2 B4 | \ / \ / | B1 B3 ^ / \ / \ | C0 C2 C4 | \ / \ / / C1 C3 / \ / / loop backedge llvm-svn: 256123 --- polly/include/polly/ScopInfo.h | 1 + polly/lib/Analysis/ScopInfo.cpp | 13 ++ polly/test/ScopInfo/complex-branch-structure.ll | 195 ++++++++++++++++++++++++ 3 files changed, 209 insertions(+) create mode 100644 polly/test/ScopInfo/complex-branch-structure.ll diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 16a5275..d70d13d 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -77,6 +77,7 @@ enum AssumptionKind { INFINITELOOP, INVARIANTLOAD, DELINEARIZATION, + ERROR_DOMAINCONJUNCTS, }; /// Maps from a loop to the affine function expressing its backedge taken count. diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index a617e1a..6a933f1 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -62,6 +62,11 @@ using namespace polly; STATISTIC(ScopFound, "Number of valid Scops"); STATISTIC(RichScopFound, "Number of Scops containing a loop"); +// The maximal number of basic sets we allow during domain construction to +// be created. More complex scops will result in very high compile time and +// are also unlikely to result in good code +static int const MaxConjunctsInDomain = 20; + static cl::opt ModelReadOnlyScalars( "polly-analyze-read-only-scalars", cl::desc("Model read-only scalar values in the scop description"), @@ -2176,6 +2181,12 @@ void Scop::buildDomainsWithBranchConstraints(Region *R) { SuccDomain = isl_set_union(SuccDomain, CondSet); SuccDomain = isl_set_coalesce(SuccDomain); + if (isl_set_n_basic_set(SuccDomain) > MaxConjunctsInDomain) { + auto *Empty = isl_set_empty(isl_set_get_space(SuccDomain)); + isl_set_free(SuccDomain); + SuccDomain = Empty; + invalidate(ERROR_DOMAINCONJUNCTS, DebugLoc()); + } DEBUG(dbgs() << "\tSet SuccBB: " << SuccBB->getName() << " : " << SuccDomain << "\n"); } @@ -3043,6 +3054,8 @@ static std::string toString(AssumptionKind Kind) { return "Invariant load"; case DELINEARIZATION: return "Delinearization"; + case ERROR_DOMAINCONJUNCTS: + return "Low number of domain conjuncts"; } llvm_unreachable("Unknown AssumptionKind!"); } diff --git a/polly/test/ScopInfo/complex-branch-structure.ll b/polly/test/ScopInfo/complex-branch-structure.ll new file mode 100644 index 0000000..201a410 --- /dev/null +++ b/polly/test/ScopInfo/complex-branch-structure.ll @@ -0,0 +1,195 @@ +; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops \ +; RUN: < %s 2>&1 | FileCheck %s + +; We build a scop of the following form to check that the domain construction +; does not take a huge amount of time, but that we instead just bail out. +; +; loop.header +; / | \ \ +; A0 A2 A4 \ +; \ / \ / \ +; A1 A3 \ +; / \ / \ | +; B0 B2 B4 | +; \ / \ / | +; B1 B3 ^ +; / \ / \ | +; C0 C2 C4 | +; \ / \ / / +; C1 C3 / +; \ / / +; loop backedge + +; CHECK: Low number of domain conjuncts assumption: { : 1 = 0 } + +define void @foo(float* %A, float* %B, float* %C, float* %D, float* %E, + i64 %A1.p, i64 %A2.p, i64 %A3.p, + i64 %B1.p, i64 %B2.p, i64 %B3.p, + i64 %C1.p, i64 %C2.p, i64 %C3.p, + i64 %D1.p, i64 %D2.p, i64 %D3.p, + i64 %E1.p, i64 %E2.p, i64 %E3.p) { +entry: + br label %loop.header + +loop.header: + %indvar = phi i64 [0, %entry], [%indvar.next, %loop.backedge] + switch i2 0, label %A0 [i2 1, label %A2 i2 2, label %A4] + +A0: + %val.A0 = load float, float* %A + store float %val.A0, float* %A + br label %A1 + +A2: + %val.A2 = load float, float* %A + store float %val.A2, float* %A + %A2.cmp = icmp eq i64 %A2.p, 0 + br i1 %A2.cmp, label %A1, label %A3 + +A4: + %val.A4 = load float, float* %A + store float %val.A4, float* %A + br label %A3 + +A1: + %val.A1 = load float, float* %A + store float %val.A1, float* %A + %A1.cmp = icmp eq i64 %A1.p, 0 + br i1 %A1.cmp, label %B0, label %B2 + +A3: + %val.A3 = load float, float* %A + store float %val.A3, float* %A + %A3.cmp = icmp eq i64 %A3.p, 0 + br i1 %A3.cmp, label %B2, label %B4 + +B0: + %val.B0 = load float, float* %B + store float %val.B0, float* %B + br label %B1 + +B2: + %val.B2 = load float, float* %B + store float %val.B2, float* %B + %B2.cmp = icmp eq i64 %B2.p, 0 + br i1 %B2.cmp, label %B1, label %B3 + +B4: + %val.B4 = load float, float* %B + store float %val.B4, float* %B + br label %B3 + +B1: + %val.B1 = load float, float* %B + store float %val.B1, float* %B + %B1.cmp = icmp eq i64 %B1.p, 0 + br i1 %B1.cmp, label %C0, label %C2 + +B3: + %val.B3 = load float, float* %A + store float %val.B3, float* %A + %B3.cmp = icmp eq i64 %A3.p, 0 + br i1 %B3.cmp, label %C2, label %C4 + +C0: + %val.C0 = load float, float* %C + store float %val.C0, float* %C + br label %C1 + +C2: + %val.C2 = load float, float* %C + store float %val.C2, float* %C + %C2.cmp = icmp eq i64 %C2.p, 0 + br i1 %C2.cmp, label %C1, label %C3 + +C4: + %val.C4 = load float, float* %C + store float %val.C4, float* %C + br label %C3 + +C1: + %val.C1 = load float, float* %C + store float %val.C1, float* %C + %C1.cmp = icmp eq i64 %C1.p, 0 + br i1 %C1.cmp, label %D0, label %D2 + +C3: + %val.C3 = load float, float* %C + store float %val.C3, float* %C + %C3.cmp = icmp eq i64 %C3.p, 0 + br i1 %C3.cmp, label %D2, label %D4 + +D0: + %val.D0 = load float, float* %D + store float %val.D0, float* %D + br label %D1 + +D2: + %val.D2 = load float, float* %D + store float %val.D2, float* %D + %D2.cmp = icmp eq i64 %D2.p, 0 + br i1 %D2.cmp, label %D1, label %D3 + +D4: + %val.D4 = load float, float* %D + store float %val.D4, float* %D + br label %D3 + +D1: + %val.D1 = load float, float* %D + store float %val.D1, float* %D + %D1.cmp = icmp eq i64 %D1.p, 0 + br i1 %D1.cmp, label %E0, label %E2 + +D3: + %val.D3 = load float, float* %D + store float %val.D3, float* %D + %D3.cmp = icmp eq i64 %D3.p, 0 + br i1 %D3.cmp, label %E2, label %E4 + +E0: + %val.E0 = load float, float* %E + store float %val.E0, float* %E + br label %E1 + +E2: + %val.E2 = load float, float* %E + store float %val.E2, float* %E + %E2.cmp = icmp eq i64 %E2.p, 0 + br i1 %E2.cmp, label %E1, label %E3 + +E4: + %val.E4 = load float, float* %E + store float %val.E4, float* %E + br label %E3 + +E1: + %val.E1 = load float, float* %E + store float %val.E1, float* %E + %E1.cmp = icmp eq i64 %E1.p, 0 + br i1 %E1.cmp, label %F0, label %F2 + +E3: + %val.E3 = load float, float* %E + store float %val.E3, float* %E + %E3.cmp = icmp eq i64 %E3.p, 0 + br i1 %E3.cmp, label %F2, label %F4 + +F0: + br label %loop.backedge + +F2: + br label %loop.backedge + +F4: + br label %loop.backedge + +loop.backedge: + %indvar.next = add i64 %indvar, 1 + %cmp = icmp ne i64 %indvar, 1000 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void + +} -- 2.7.4