///
/// @param Kernel The kernel to scan for llvm::Values
///
- /// @returns A pair, whose first element contains the set of values
- /// referenced by the kernel, and whose second element contains the
- /// set of functions referenced by the kernel. All functions in the
- /// second set satisfy isValidFunctionInKernel.
- std::pair<SetVector<Value *>, SetVector<Function *>>
+ /// @returns A tuple, whose:
+ /// - First element contains the set of values referenced by the
+ /// kernel
+ /// - Second element contains the set of functions referenced by the
+ /// kernel. All functions in the set satisfy
+ /// `isValidFunctionInKernel`.
+ /// - Third element contains loops that have induction variables
+ /// which are used in the kernel, *and* these loops are *neither*
+ /// in the scop, nor do they immediately surroung the Scop.
+ /// See [Code generation of induction variables of loops outside
+ /// Scops]
+ std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
getReferencesInKernel(ppcg_kernel *Kernel);
/// Compute the sizes of the execution grid for a given kernel.
return SubtreeFunctions;
}
-std::pair<SetVector<Value *>, SetVector<Function *>>
+std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) {
SetVector<Value *> SubtreeValues;
SetVector<const SCEV *> SCEVs;
for (const auto &I : IDToValue)
SubtreeValues.insert(I.second);
+ // NOTE: this is populated in IslNodeBuilder::addParameters
+ // See [Code generation of induction variables of loops outside Scops].
+ for (const auto &I : OutsideLoopIterations)
+ SubtreeValues.insert(cast<SCEVUnknown>(I.second)->getValue());
+
isl_ast_node_foreach_descendant_top_down(
Kernel->tree, collectReferencesInGPUStmt, &References);
- for (const SCEV *Expr : SCEVs)
+ for (const SCEV *Expr : SCEVs) {
findValues(Expr, SE, SubtreeValues);
+ findLoops(Expr, Loops);
+ }
+
+ Loops.remove_if([this](const Loop *L) {
+ return S.contains(L) || L->contains(S.getEntry());
+ });
for (auto &SAI : S.arrays())
SubtreeValues.remove(SAI->getBasePtr());
else
ReplacedValues.insert(It->second);
}
- return std::make_pair(ReplacedValues, ValidSubtreeFunctions);
+ return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops);
}
void GPUNodeBuilder::clearDominators(Function *F) {
SetVector<Value *> SubtreeValues;
SetVector<Function *> SubtreeFunctions;
- std::tie(SubtreeValues, SubtreeFunctions) = getReferencesInKernel(Kernel);
+ SetVector<const Loop *> Loops;
+ std::tie(SubtreeValues, SubtreeFunctions, Loops) =
+ getReferencesInKernel(Kernel);
assert(Kernel->tree && "Device AST of kernel node is empty");
BlockGenerator::AllocaMapTy HostScalarMap = ScalarMap;
ScalarMap.clear();
- SetVector<const Loop *> Loops;
-
// Create for all loops we depend on values that contain the current loop
// iteration. These values are necessary to generate code for SCEVs that
// depend on such loops. As a result we need to pass them to the subfunction.
--- /dev/null
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s -check-prefix=SCOP
+
+; There is no FileCheck because we want to make sure that this doesn't crash.
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-fail-on-verify-module-failure \
+; RUN: -disable-output < %s
+
+; REQUIRES: pollyacc
+
+; Due to the existence of the `fence` call, We can only detect the inner loop
+; and not the outer loop. PPCGCodeGeneration had not implemented this case.
+; The fix was to pull the implementation from `IslNodeBuilder.
+
+; Make sure that we only capture the inner loop
+; SCOP: Function: f
+; SCOP-NEXT: Region: %for2.body---%for2.body.fence
+; SCOP-NEXT: Max Loop Depth: 1
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @fn_to_fence(i32 *%val)
+
+; void f(int *arr, bool shouldcont) {
+; for(int i = 0; ; i++) {
+; for(int j = 0; j < 10; j++) {
+; arr[j] = i;
+; }
+; fence(arr);
+; if (!shouldcont) break;
+; }
+; }
+
+
+; Function Attrs: nounwind uwtable
+define void @f(i32 *%arr, i1 %shouldcont) #1 {
+entry:
+ br label %for.init
+
+for.init: ; preds = %for.end, %entry.split
+ %i = phi i32 [ %i.next, %for.end ], [ 0, %entry ]
+ br label %for2.body
+
+for2.body: ; preds = %"65", %"64"
+ %j = phi i32 [ %j.next, %for2.body ], [ 0, %for.init ]
+ %j.sext = sext i32 %j to i64
+ %arr.slot = getelementptr i32, i32* %arr, i64 %j.sext
+ store i32 %i, i32* %arr.slot, align 4
+ %exitcond = icmp eq i32 %j, 10
+ %j.next = add i32 %j, 1
+ br i1 %exitcond, label %for2.body.fence, label %for2.body
+
+for2.body.fence: ; preds = %"65"
+ call void @fn_to_fence(i32* %arr) #2
+ br i1 %shouldcont, label %for.end, label %exit
+for.end: ; preds = %"69"
+ %i.next = add i32 %i, 1
+ br label %for.init
+
+exit: ; preds = %"69"
+ ret void
+
+}
+
+
+attributes #0 = { argmemonly nounwind }
+attributes #1 = { nounwind uwtable }
+attributes #2 = { nounwind }