return PPCGScop;
}
+ /// Collect the array acesses in a statement.
+ ///
+ /// @param Stmt The statement for which to collect the accesses.
+ ///
+ /// @returns A list of array accesses.
+ gpu_stmt_access *getStmtAccesses(ScopStmt &Stmt) {
+ gpu_stmt_access *Accesses = nullptr;
+
+ for (MemoryAccess *Acc : Stmt) {
+ auto Access = isl_alloc_type(S->getIslCtx(), struct gpu_stmt_access);
+ Access->read = Acc->isRead();
+ Access->write = Acc->isWrite();
+ Access->access = Acc->getAccessRelation();
+ isl_space *Space = isl_map_get_space(Access->access);
+ Space = isl_space_range(Space);
+ Space = isl_space_from_range(Space);
+ isl_map *Universe = isl_map_universe(Space);
+ Access->tagged_access =
+ isl_map_domain_product(Acc->getAccessRelation(), Universe);
+ Access->exact_write = Acc->isWrite();
+ Access->ref_id = Acc->getId();
+ Access->next = Accesses;
+ Accesses = Access;
+ }
+
+ return Accesses;
+ }
+
/// Collect the list of GPU statements.
///
/// Each statement has an id, a pointer to the underlying data structure,
// We use the pet stmt pointer to keep track of the Polly statements.
GPUStmt->stmt = (pet_stmt *)&Stmt;
- GPUStmt->accesses = nullptr;
+ GPUStmt->accesses = getStmtAccesses(Stmt);
i++;
}
return Stmts;
}
+ /// Derive the extent of an array.
+ ///
+ /// The extent of an array is defined by the set of memory locations for
+ /// which a memory access in the iteration domain exists.
+ ///
+ /// @param Array The array to derive the extent for.
+ ///
+ /// @returns An isl_set describing the extent of the array.
+ __isl_give isl_set *getExtent(ScopArrayInfo *Array) {
+ isl_union_map *Accesses = S->getAccesses();
+ Accesses = isl_union_map_intersect_domain(Accesses, S->getDomains());
+ isl_union_set *AccessUSet = isl_union_map_range(Accesses);
+ isl_set *AccessSet =
+ isl_union_set_extract_set(AccessUSet, Array->getSpace());
+ isl_union_set_free(AccessUSet);
+
+ return AccessSet;
+ }
+
+ /// Derive the bounds of an array.
+ ///
+ /// For the first dimension we derive the bound of the array from the extent
+ /// of this dimension. For inner dimensions we obtain their size directly from
+ /// ScopArrayInfo.
+ ///
+ /// @param PPCGArray The array to compute bounds for.
+ /// @param Array The polly array from which to take the information.
+ void setArrayBounds(gpu_array_info &PPCGArray, ScopArrayInfo *Array) {
+ if (PPCGArray.n_index > 0) {
+ isl_set *Dom = isl_set_copy(PPCGArray.extent);
+ Dom = isl_set_project_out(Dom, isl_dim_set, 1, PPCGArray.n_index - 1);
+ isl_pw_aff *Bound = isl_set_dim_max(isl_set_copy(Dom), 0);
+ isl_set_free(Dom);
+ Dom = isl_pw_aff_domain(isl_pw_aff_copy(Bound));
+ isl_local_space *LS = isl_local_space_from_space(isl_set_get_space(Dom));
+ isl_aff *One = isl_aff_zero_on_domain(LS);
+ One = isl_aff_add_constant_si(One, 1);
+ Bound = isl_pw_aff_add(Bound, isl_pw_aff_alloc(Dom, One));
+ Bound = isl_pw_aff_gist(Bound, S->getContext());
+ PPCGArray.bound[0] = Bound;
+ }
+
+ for (unsigned i = 1; i < PPCGArray.n_index; ++i) {
+ isl_pw_aff *Bound = Array->getDimensionSizePw(i);
+ auto LS = isl_pw_aff_get_domain_space(Bound);
+ auto Aff = isl_multi_aff_zero(LS);
+ Bound = isl_pw_aff_pullback_multi_aff(Bound, Aff);
+ PPCGArray.bound[i] = Bound;
+ }
+ }
+
+ /// Create the arrays for @p PPCGProg.
+ ///
+ /// @param PPCGProg The program to compute the arrays for.
+ void createArrays(gpu_prog *PPCGProg) {
+ int i = 0;
+ for (auto &Element : S->arrays()) {
+ ScopArrayInfo *Array = Element.second.get();
+
+ std::string TypeName;
+ raw_string_ostream OS(TypeName);
+
+ OS << *Array->getElementType();
+ TypeName = OS.str();
+
+ gpu_array_info &PPCGArray = PPCGProg->array[i];
+
+ PPCGArray.space = Array->getSpace();
+ PPCGArray.type = strdup(TypeName.c_str());
+ PPCGArray.size = Array->getElementType()->getPrimitiveSizeInBits() / 8;
+ PPCGArray.name = strdup(Array->getName().c_str());
+ PPCGArray.extent = nullptr;
+ PPCGArray.n_index = Array->getNumberOfDimensions();
+ PPCGArray.bound =
+ isl_alloc_array(S->getIslCtx(), isl_pw_aff *, PPCGArray.n_index);
+ PPCGArray.extent = getExtent(Array);
+ PPCGArray.n_ref = 0;
+ PPCGArray.refs = nullptr;
+ PPCGArray.accessed = true;
+ PPCGArray.read_only_scalar = false;
+ PPCGArray.has_compound_element = false;
+ PPCGArray.local = false;
+ PPCGArray.declare_local = false;
+ PPCGArray.global = false;
+ PPCGArray.linearize = false;
+ PPCGArray.dep_order = nullptr;
+
+ setArrayBounds(PPCGArray, Array);
+ }
+ }
+
+ /// Create an identity map between the arrays in the scop.
+ ///
+ /// @returns An identity map between the arrays in the scop.
+ isl_union_map *getArrayIdentity() {
+ isl_union_map *Maps = isl_union_map_empty(S->getParamSpace());
+
+ for (auto &Item : S->arrays()) {
+ ScopArrayInfo *Array = Item.second.get();
+ isl_space *Space = Array->getSpace();
+ Space = isl_space_map_from_set(Space);
+ isl_map *Identity = isl_map_identity(Space);
+ Maps = isl_union_map_add_map(Maps, Identity);
+ }
+
+ return Maps;
+ }
+
/// Create a default-initialized PPCG GPU program.
///
/// @returns A new gpu grogram description.
PPCGProg->ctx = S->getIslCtx();
PPCGProg->scop = PPCGScop;
PPCGProg->context = isl_set_copy(PPCGScop->context);
- PPCGProg->read = nullptr;
- PPCGProg->may_write = nullptr;
- PPCGProg->must_write = nullptr;
- PPCGProg->tagged_must_kill = nullptr;
- PPCGProg->may_persist = nullptr;
- PPCGProg->to_outer = nullptr;
- PPCGProg->to_inner = nullptr;
+ PPCGProg->read = isl_union_map_copy(PPCGScop->reads);
+ PPCGProg->may_write = isl_union_map_copy(PPCGScop->may_writes);
+ PPCGProg->must_write = isl_union_map_copy(PPCGScop->must_writes);
+ PPCGProg->tagged_must_kill =
+ isl_union_map_copy(PPCGScop->tagged_must_kills);
+ PPCGProg->to_inner = getArrayIdentity();
+ PPCGProg->to_outer = getArrayIdentity();
+ PPCGProg->may_persist = compute_may_persist(PPCGProg);
PPCGProg->any_to_outer = nullptr;
PPCGProg->array_order = nullptr;
PPCGProg->n_stmts = std::distance(S->begin(), S->end());
PPCGProg->stmts = getStatements();
- PPCGProg->n_array = 0;
- PPCGProg->array = nullptr;
+ PPCGProg->n_array = std::distance(S->array_begin(), S->array_end());
+ PPCGProg->array = isl_calloc_array(S->getIslCtx(), struct gpu_array_info,
+ PPCGProg->n_array);
+
+ createArrays(PPCGProg);
return PPCGProg;
}
; SCHED-NEXT: child:
; SCHED-NEXT: context: "{ [] }"
; SCHED-NEXT: child:
-; SCHED-NEXT: extension: "{ }"
+; SCHED-NEXT: extension: "{ [] -> from_device_MemRef_A[]; [] -> to_device_MemRef_A[] }"
; SCHED-NEXT: child:
; SCHED-NEXT: sequence:
-; SCHED-NEXT: - filter: "{ }"
+; SCHED-NEXT: - filter: "{ to_device_MemRef_A[] }"
+; SCHED-NEXT: child:
+; SCHED-NEXT: set:
+; SCHED-NEXT: - filter: "{ to_device_MemRef_A[] }"
+; SCHED-NEXT: child:
+; SCHED-NEXT: guard: "{ [] }"
; SCHED-NEXT: - filter: "{ Stmt_bb5[i0, i1] }"
; SCHED-NEXT: child:
; SCHED-NEXT: guard: "{ [] }"
; SCHED-NEXT: schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/16) - 2*floor((i1)/32))] }]"
; SCHED-NEXT: permutable: 1
; SCHED-NEXT: coincident: [ 1, 1 ]
-; SCHED-NEXT: - filter: "{ }"
+; SCHED-NEXT: - filter: "{ from_device_MemRef_A[] }"
+; SCHED-NEXT: child:
+; SCHED-NEXT: set:
+; SCHED-NEXT: - filter: "{ from_device_MemRef_A[] }"
+; SCHED-NEXT: child:
+; SCHED-NEXT: guard: "{ [] }"
; CODE: Code
; CODE-NEXT: ====
; CODE-NEXT: # host
; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(16, 32);
-; CODE-NEXT: dim3 k0_dimGrid(32, 32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
-; CODE-NEXT: cudaCheckKernel();
+; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyHostToDevice));
+; CODE-NEXT: {
+; CODE-NEXT: dim3 k0_dimBlock(16, 32);
+; CODE-NEXT: dim3 k0_dimGrid(32, 32);
+; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
+; CODE-NEXT: cudaCheckKernel();
+; CODE-NEXT: }
+
+; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyDeviceToHost));
; CODE-NEXT: }
; CODE: # kernel0
; CODE-NEXT: Stmt_bb5(32 * b0 + t0, 32 * b1 + t1 + 16 * c3);
+
; void double_parallel_loop(float A[][1024]) {
; for (long i = 0; i < 1024; i++)
; for (long j = 0; j < 1024; j++)