From 60f63b49f26f1c9908a4828b7e7ac4441b17efaa Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Fri, 15 Jul 2016 07:05:54 +0000 Subject: [PATCH] GPGPU: Model array access information This allows us to derive host-device and device-host data-transfers. llvm-svn: 275535 --- polly/lib/CodeGen/PPCGCodeGeneration.cpp | 160 +++++++++++++++++++++++++++++-- polly/lib/External/ppcg/gpu.c | 2 +- polly/lib/External/ppcg/gpu.h | 2 + polly/test/GPGPU/double-parallel-loop.ll | 30 ++++-- 4 files changed, 176 insertions(+), 18 deletions(-) diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 24fe61b..c3d1f02 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -265,6 +265,34 @@ public: return PPCGScop; } + /// Collect the array acesses in a statement. + /// + /// @param Stmt The statement for which to collect the accesses. + /// + /// @returns A list of array accesses. + gpu_stmt_access *getStmtAccesses(ScopStmt &Stmt) { + gpu_stmt_access *Accesses = nullptr; + + for (MemoryAccess *Acc : Stmt) { + auto Access = isl_alloc_type(S->getIslCtx(), struct gpu_stmt_access); + Access->read = Acc->isRead(); + Access->write = Acc->isWrite(); + Access->access = Acc->getAccessRelation(); + isl_space *Space = isl_map_get_space(Access->access); + Space = isl_space_range(Space); + Space = isl_space_from_range(Space); + isl_map *Universe = isl_map_universe(Space); + Access->tagged_access = + isl_map_domain_product(Acc->getAccessRelation(), Universe); + Access->exact_write = Acc->isWrite(); + Access->ref_id = Acc->getId(); + Access->next = Accesses; + Accesses = Access; + } + + return Accesses; + } + /// Collect the list of GPU statements. /// /// Each statement has an id, a pointer to the underlying data structure, @@ -285,13 +313,121 @@ public: // We use the pet stmt pointer to keep track of the Polly statements. GPUStmt->stmt = (pet_stmt *)&Stmt; - GPUStmt->accesses = nullptr; + GPUStmt->accesses = getStmtAccesses(Stmt); i++; } return Stmts; } + /// Derive the extent of an array. + /// + /// The extent of an array is defined by the set of memory locations for + /// which a memory access in the iteration domain exists. + /// + /// @param Array The array to derive the extent for. + /// + /// @returns An isl_set describing the extent of the array. + __isl_give isl_set *getExtent(ScopArrayInfo *Array) { + isl_union_map *Accesses = S->getAccesses(); + Accesses = isl_union_map_intersect_domain(Accesses, S->getDomains()); + isl_union_set *AccessUSet = isl_union_map_range(Accesses); + isl_set *AccessSet = + isl_union_set_extract_set(AccessUSet, Array->getSpace()); + isl_union_set_free(AccessUSet); + + return AccessSet; + } + + /// Derive the bounds of an array. + /// + /// For the first dimension we derive the bound of the array from the extent + /// of this dimension. For inner dimensions we obtain their size directly from + /// ScopArrayInfo. + /// + /// @param PPCGArray The array to compute bounds for. + /// @param Array The polly array from which to take the information. + void setArrayBounds(gpu_array_info &PPCGArray, ScopArrayInfo *Array) { + if (PPCGArray.n_index > 0) { + isl_set *Dom = isl_set_copy(PPCGArray.extent); + Dom = isl_set_project_out(Dom, isl_dim_set, 1, PPCGArray.n_index - 1); + isl_pw_aff *Bound = isl_set_dim_max(isl_set_copy(Dom), 0); + isl_set_free(Dom); + Dom = isl_pw_aff_domain(isl_pw_aff_copy(Bound)); + isl_local_space *LS = isl_local_space_from_space(isl_set_get_space(Dom)); + isl_aff *One = isl_aff_zero_on_domain(LS); + One = isl_aff_add_constant_si(One, 1); + Bound = isl_pw_aff_add(Bound, isl_pw_aff_alloc(Dom, One)); + Bound = isl_pw_aff_gist(Bound, S->getContext()); + PPCGArray.bound[0] = Bound; + } + + for (unsigned i = 1; i < PPCGArray.n_index; ++i) { + isl_pw_aff *Bound = Array->getDimensionSizePw(i); + auto LS = isl_pw_aff_get_domain_space(Bound); + auto Aff = isl_multi_aff_zero(LS); + Bound = isl_pw_aff_pullback_multi_aff(Bound, Aff); + PPCGArray.bound[i] = Bound; + } + } + + /// Create the arrays for @p PPCGProg. + /// + /// @param PPCGProg The program to compute the arrays for. + void createArrays(gpu_prog *PPCGProg) { + int i = 0; + for (auto &Element : S->arrays()) { + ScopArrayInfo *Array = Element.second.get(); + + std::string TypeName; + raw_string_ostream OS(TypeName); + + OS << *Array->getElementType(); + TypeName = OS.str(); + + gpu_array_info &PPCGArray = PPCGProg->array[i]; + + PPCGArray.space = Array->getSpace(); + PPCGArray.type = strdup(TypeName.c_str()); + PPCGArray.size = Array->getElementType()->getPrimitiveSizeInBits() / 8; + PPCGArray.name = strdup(Array->getName().c_str()); + PPCGArray.extent = nullptr; + PPCGArray.n_index = Array->getNumberOfDimensions(); + PPCGArray.bound = + isl_alloc_array(S->getIslCtx(), isl_pw_aff *, PPCGArray.n_index); + PPCGArray.extent = getExtent(Array); + PPCGArray.n_ref = 0; + PPCGArray.refs = nullptr; + PPCGArray.accessed = true; + PPCGArray.read_only_scalar = false; + PPCGArray.has_compound_element = false; + PPCGArray.local = false; + PPCGArray.declare_local = false; + PPCGArray.global = false; + PPCGArray.linearize = false; + PPCGArray.dep_order = nullptr; + + setArrayBounds(PPCGArray, Array); + } + } + + /// Create an identity map between the arrays in the scop. + /// + /// @returns An identity map between the arrays in the scop. + isl_union_map *getArrayIdentity() { + isl_union_map *Maps = isl_union_map_empty(S->getParamSpace()); + + for (auto &Item : S->arrays()) { + ScopArrayInfo *Array = Item.second.get(); + isl_space *Space = Array->getSpace(); + Space = isl_space_map_from_set(Space); + isl_map *Identity = isl_map_identity(Space); + Maps = isl_union_map_add_map(Maps, Identity); + } + + return Maps; + } + /// Create a default-initialized PPCG GPU program. /// /// @returns A new gpu grogram description. @@ -305,19 +441,23 @@ public: PPCGProg->ctx = S->getIslCtx(); PPCGProg->scop = PPCGScop; PPCGProg->context = isl_set_copy(PPCGScop->context); - PPCGProg->read = nullptr; - PPCGProg->may_write = nullptr; - PPCGProg->must_write = nullptr; - PPCGProg->tagged_must_kill = nullptr; - PPCGProg->may_persist = nullptr; - PPCGProg->to_outer = nullptr; - PPCGProg->to_inner = nullptr; + PPCGProg->read = isl_union_map_copy(PPCGScop->reads); + PPCGProg->may_write = isl_union_map_copy(PPCGScop->may_writes); + PPCGProg->must_write = isl_union_map_copy(PPCGScop->must_writes); + PPCGProg->tagged_must_kill = + isl_union_map_copy(PPCGScop->tagged_must_kills); + PPCGProg->to_inner = getArrayIdentity(); + PPCGProg->to_outer = getArrayIdentity(); + PPCGProg->may_persist = compute_may_persist(PPCGProg); PPCGProg->any_to_outer = nullptr; PPCGProg->array_order = nullptr; PPCGProg->n_stmts = std::distance(S->begin(), S->end()); PPCGProg->stmts = getStatements(); - PPCGProg->n_array = 0; - PPCGProg->array = nullptr; + PPCGProg->n_array = std::distance(S->array_begin(), S->array_end()); + PPCGProg->array = isl_calloc_array(S->getIslCtx(), struct gpu_array_info, + PPCGProg->n_array); + + createArrays(PPCGProg); return PPCGProg; } diff --git a/polly/lib/External/ppcg/gpu.c b/polly/lib/External/ppcg/gpu.c index 218b918..e76e149 100644 --- a/polly/lib/External/ppcg/gpu.c +++ b/polly/lib/External/ppcg/gpu.c @@ -5309,7 +5309,7 @@ int generate_gpu(isl_ctx *ctx, const char *input, FILE *out, * arrays that are not local to "prog" and remove those elements that * are definitely killed or definitely written by "prog". */ -static __isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog) +__isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog) { int i; isl_union_set *may_persist, *killed; diff --git a/polly/lib/External/ppcg/gpu.h b/polly/lib/External/ppcg/gpu.h index 7d617de..78bccdf 100644 --- a/polly/lib/External/ppcg/gpu.h +++ b/polly/lib/External/ppcg/gpu.h @@ -369,4 +369,6 @@ __isl_give isl_schedule *map_to_device(struct gpu_gen *gen, __isl_take isl_schedule *schedule); __isl_give isl_ast_node *generate_code(struct gpu_gen *gen, __isl_take isl_schedule *schedule); + +__isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog); #endif diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index be33a6c..e1e8e23 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -23,10 +23,15 @@ ; SCHED-NEXT: child: ; SCHED-NEXT: context: "{ [] }" ; SCHED-NEXT: child: -; SCHED-NEXT: extension: "{ }" +; SCHED-NEXT: extension: "{ [] -> from_device_MemRef_A[]; [] -> to_device_MemRef_A[] }" ; SCHED-NEXT: child: ; SCHED-NEXT: sequence: -; SCHED-NEXT: - filter: "{ }" +; SCHED-NEXT: - filter: "{ to_device_MemRef_A[] }" +; SCHED-NEXT: child: +; SCHED-NEXT: set: +; SCHED-NEXT: - filter: "{ to_device_MemRef_A[] }" +; SCHED-NEXT: child: +; SCHED-NEXT: guard: "{ [] }" ; SCHED-NEXT: - filter: "{ Stmt_bb5[i0, i1] }" ; SCHED-NEXT: child: ; SCHED-NEXT: guard: "{ [] }" @@ -46,16 +51,26 @@ ; SCHED-NEXT: schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/16) - 2*floor((i1)/32))] }]" ; SCHED-NEXT: permutable: 1 ; SCHED-NEXT: coincident: [ 1, 1 ] -; SCHED-NEXT: - filter: "{ }" +; SCHED-NEXT: - filter: "{ from_device_MemRef_A[] }" +; SCHED-NEXT: child: +; SCHED-NEXT: set: +; SCHED-NEXT: - filter: "{ from_device_MemRef_A[] }" +; SCHED-NEXT: child: +; SCHED-NEXT: guard: "{ [] }" ; CODE: Code ; CODE-NEXT: ==== ; CODE-NEXT: # host ; CODE-NEXT: { -; CODE-NEXT: dim3 k0_dimBlock(16, 32); -; CODE-NEXT: dim3 k0_dimGrid(32, 32); -; CODE-NEXT: kernel0 <<>> (); -; CODE-NEXT: cudaCheckKernel(); +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyHostToDevice)); +; CODE-NEXT: { +; CODE-NEXT: dim3 k0_dimBlock(16, 32); +; CODE-NEXT: dim3 k0_dimGrid(32, 32); +; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: cudaCheckKernel(); +; CODE-NEXT: } + +; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyDeviceToHost)); ; CODE-NEXT: } ; CODE: # kernel0 @@ -63,6 +78,7 @@ ; CODE-NEXT: Stmt_bb5(32 * b0 + t0, 32 * b1 + t1 + 16 * c3); + ; void double_parallel_loop(float A[][1024]) { ; for (long i = 0; i < 1024; i++) ; for (long j = 0; j < 1024; j++) -- 2.7.4