From e53c924b0f06b993febc53c876ed57b6c436a73f Mon Sep 17 00:00:00 2001 From: Siddharth Bhat Date: Sun, 6 Aug 2017 02:39:05 +0000 Subject: [PATCH] [Polly] [PPCGCodeGeneration] Deal with loops outside the Scop correctly in PPCGCodeGeneration. A Scop with a loop outside it is not handled currently by PPCGCodeGeneration. The test case is such that the Scop has only one inner loop that is detected. This currently breaks codegen. The fix is to reuse the existing mechanism in `IslNodeBuilder` within `GPUNodeBuilder. Differential Revision: https://reviews.llvm.org/D36290 llvm-svn: 310193 --- polly/lib/CodeGen/IslNodeBuilder.cpp | 1 + polly/lib/CodeGen/PPCGCodeGeneration.cpp | 40 +++++++++++++------ polly/test/GPGPU/loops-outside-scop.ll | 67 ++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 11 deletions(-) create mode 100644 polly/test/GPGPU/loops-outside-scop.ll diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 6995fc55..0619b85 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -296,6 +296,7 @@ void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For, for (const auto &I : IDToValue) Values.insert(I.second); + // NOTE: this is populated in IslNodeBuilder::addParameters for (const auto &I : OutsideLoopIterations) Values.insert(cast(I.second)->getValue()); diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 42626e0..8daa562 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -420,11 +420,18 @@ private: /// /// @param Kernel The kernel to scan for llvm::Values /// - /// @returns A pair, whose first element contains the set of values - /// referenced by the kernel, and whose second element contains the - /// set of functions referenced by the kernel. All functions in the - /// second set satisfy isValidFunctionInKernel. - std::pair, SetVector> + /// @returns A tuple, whose: + /// - First element contains the set of values referenced by the + /// kernel + /// - Second element contains the set of functions referenced by the + /// kernel. All functions in the set satisfy + /// `isValidFunctionInKernel`. + /// - Third element contains loops that have induction variables + /// which are used in the kernel, *and* these loops are *neither* + /// in the scop, nor do they immediately surroung the Scop. + /// See [Code generation of induction variables of loops outside + /// Scops] + std::tuple, SetVector, SetVector> getReferencesInKernel(ppcg_kernel *Kernel); /// Compute the sizes of the execution grid for a given kernel. @@ -1396,7 +1403,7 @@ getFunctionsFromRawSubtreeValues(SetVector RawSubtreeValues, return SubtreeFunctions; } -std::pair, SetVector> +std::tuple, SetVector, SetVector> GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) { SetVector SubtreeValues; SetVector SCEVs; @@ -1407,11 +1414,22 @@ GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) { for (const auto &I : IDToValue) SubtreeValues.insert(I.second); + // NOTE: this is populated in IslNodeBuilder::addParameters + // See [Code generation of induction variables of loops outside Scops]. + for (const auto &I : OutsideLoopIterations) + SubtreeValues.insert(cast(I.second)->getValue()); + isl_ast_node_foreach_descendant_top_down( Kernel->tree, collectReferencesInGPUStmt, &References); - for (const SCEV *Expr : SCEVs) + for (const SCEV *Expr : SCEVs) { findValues(Expr, SE, SubtreeValues); + findLoops(Expr, Loops); + } + + Loops.remove_if([this](const Loop *L) { + return S.contains(L) || L->contains(S.getEntry()); + }); for (auto &SAI : S.arrays()) SubtreeValues.remove(SAI->getBasePtr()); @@ -1458,7 +1476,7 @@ GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) { else ReplacedValues.insert(It->second); } - return std::make_pair(ReplacedValues, ValidSubtreeFunctions); + return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops); } void GPUNodeBuilder::clearDominators(Function *F) { @@ -1702,7 +1720,9 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { SetVector SubtreeValues; SetVector SubtreeFunctions; - std::tie(SubtreeValues, SubtreeFunctions) = getReferencesInKernel(Kernel); + SetVector Loops; + std::tie(SubtreeValues, SubtreeFunctions, Loops) = + getReferencesInKernel(Kernel); assert(Kernel->tree && "Device AST of kernel node is empty"); @@ -1712,8 +1732,6 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { BlockGenerator::AllocaMapTy HostScalarMap = ScalarMap; ScalarMap.clear(); - SetVector Loops; - // Create for all loops we depend on values that contain the current loop // iteration. These values are necessary to generate code for SCEVs that // depend on such loops. As a result we need to pass them to the subfunction. diff --git a/polly/test/GPGPU/loops-outside-scop.ll b/polly/test/GPGPU/loops-outside-scop.ll new file mode 100644 index 0000000..2159485 --- /dev/null +++ b/polly/test/GPGPU/loops-outside-scop.ll @@ -0,0 +1,67 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s -check-prefix=SCOP + +; There is no FileCheck because we want to make sure that this doesn't crash. +; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-fail-on-verify-module-failure \ +; RUN: -disable-output < %s + +; REQUIRES: pollyacc + +; Due to the existence of the `fence` call, We can only detect the inner loop +; and not the outer loop. PPCGCodeGeneration had not implemented this case. +; The fix was to pull the implementation from `IslNodeBuilder. + +; Make sure that we only capture the inner loop +; SCOP: Function: f +; SCOP-NEXT: Region: %for2.body---%for2.body.fence +; SCOP-NEXT: Max Loop Depth: 1 + +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +declare void @fn_to_fence(i32 *%val) + +; void f(int *arr, bool shouldcont) { +; for(int i = 0; ; i++) { +; for(int j = 0; j < 10; j++) { +; arr[j] = i; +; } +; fence(arr); +; if (!shouldcont) break; +; } +; } + + +; Function Attrs: nounwind uwtable +define void @f(i32 *%arr, i1 %shouldcont) #1 { +entry: + br label %for.init + +for.init: ; preds = %for.end, %entry.split + %i = phi i32 [ %i.next, %for.end ], [ 0, %entry ] + br label %for2.body + +for2.body: ; preds = %"65", %"64" + %j = phi i32 [ %j.next, %for2.body ], [ 0, %for.init ] + %j.sext = sext i32 %j to i64 + %arr.slot = getelementptr i32, i32* %arr, i64 %j.sext + store i32 %i, i32* %arr.slot, align 4 + %exitcond = icmp eq i32 %j, 10 + %j.next = add i32 %j, 1 + br i1 %exitcond, label %for2.body.fence, label %for2.body + +for2.body.fence: ; preds = %"65" + call void @fn_to_fence(i32* %arr) #2 + br i1 %shouldcont, label %for.end, label %exit +for.end: ; preds = %"69" + %i.next = add i32 %i, 1 + br label %for.init + +exit: ; preds = %"69" + ret void + +} + + +attributes #0 = { argmemonly nounwind } +attributes #1 = { nounwind uwtable } +attributes #2 = { nounwind } -- 2.7.4