/// - ScopStmt: A computational statement (TODO)
/// - Kernel: A GPU kernel call (TODO)
/// - Data-Transfer: A GPU <-> CPU data-transfer (TODO)
+ /// - In-kernel synchronization
+ /// - In-kernel memory copy statement
///
/// @param UserStmt The ast node to generate code for.
virtual void createUser(__isl_take isl_ast_node *UserStmt);
/// @param The kernel to generate the intrinsic functions for.
void insertKernelIntrinsics(ppcg_kernel *Kernel);
+ /// Create an in-kernel synchronization call.
+ void createKernelSync();
+
/// Finalize the generation of the kernel function.
///
/// Free the LLVM-IR module corresponding to the kernel and -- if requested --
void finalizeKernelFunction();
};
+/// Check if one string is a prefix of another.
+///
+/// @param String The string in which to look for the prefix.
+/// @param Prefix The prefix to look for.
+static bool isPrefix(std::string String, std::string Prefix) {
+ return String.find(Prefix) == 0;
+}
+
void GPUNodeBuilder::createUser(__isl_take isl_ast_node *UserStmt) {
isl_ast_expr *Expr = isl_ast_node_user_get_expr(UserStmt);
isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
return;
}
+ if (isPrefix(Str, "to_device") || isPrefix(Str, "from_device")) {
+ // TODO: Insert memory copies
+ isl_ast_expr_free(Expr);
+ isl_ast_node_free(UserStmt);
+ return;
+ }
+
+ isl_id *Anno = isl_ast_node_get_annotation(UserStmt);
+ struct ppcg_kernel_stmt *KernelStmt =
+ (struct ppcg_kernel_stmt *)isl_id_get_user(Anno);
+ isl_id_free(Anno);
+
+ switch (KernelStmt->type) {
+ case ppcg_kernel_domain:
+ // TODO Create kernel user stmt
+ isl_ast_expr_free(Expr);
+ isl_ast_node_free(UserStmt);
+ return;
+ case ppcg_kernel_copy:
+ // TODO: Create kernel copy stmt
+ isl_ast_expr_free(Expr);
+ isl_ast_node_free(UserStmt);
+ return;
+ case ppcg_kernel_sync:
+ createKernelSync();
+ isl_ast_expr_free(Expr);
+ isl_ast_node_free(UserStmt);
+ return;
+ }
+
isl_ast_expr_free(Expr);
isl_ast_node_free(UserStmt);
return;
}
+void GPUNodeBuilder::createKernelSync() {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ auto *Sync = Intrinsic::getDeclaration(M, Intrinsic::nvvm_barrier0);
+ Builder.CreateCall(Sync, {});
+}
+
void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
isl_id *Id = isl_ast_node_get_annotation(KernelStmt);
ppcg_kernel *Kernel = (ppcg_kernel *)isl_id_get_user(Id);
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=CODE %s
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck -check-prefix=KERNEL-IR %s
+
; REQUIRES: pollyacc
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CODE-NEXT: for (int c3 = 0; c3 <= 1; c3 += 1)
; CODE-NEXT: Stmt_for_body62(32 * b0 + t0 + 8192 * c0, 32 * b1 + t1 + 16 * c3);
+; KERNEL-IR: call void @llvm.nvvm.barrier0()
; Function Attrs: nounwind uwtable
define internal void @kernel_gramschmidt(i32 %ni, i32 %nj, [512 x double]* %A, [512 x double]* %R, [512 x double]* %Q) #1 {