From aef5196f75465ad2ee9a0ee3a4b537d069980174 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Thu, 14 Jul 2016 10:51:52 +0000 Subject: [PATCH] GPGPU: Map initial schedule to GPU schedule This change now applies ppcg's GPU mapping on our initial schedule. For this to work, we need to also initialize the set of all names (isl_ids) used in the scop as well as the program context. llvm-svn: 275396 --- polly/lib/CodeGen/PPCGCodeGeneration.cpp | 43 ++++++++++++++++++++++++++++++-- polly/lib/External/ppcg/gpu.c | 4 +-- polly/lib/External/ppcg/gpu.h | 3 +++ polly/test/GPGPU/double-parallel-loop.ll | 30 +++++++++++++++++++--- 4 files changed, 73 insertions(+), 7 deletions(-) diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index cd84d3c..b61fffb 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -155,6 +155,38 @@ public: return getTaggedAccesses(MemoryAccess::MUST_WRITE); } + /// Collect parameter and array names as isl_ids. + /// + /// To reason about the different parameters and arrays used, ppcg requires + /// a list of all isl_ids in use. As PPCG traditionally performs + /// source-to-source compilation each of these isl_ids is mapped to the + /// expression that represents it. As we do not have a corresponding + /// expression in Polly, we just map each id to a 'zero' expression to match + /// the data format that ppcg expects. + /// + /// @returns Retun a map from collected ids to 'zero' ast expressions. + __isl_give isl_id_to_ast_expr *getNames() { + auto *Names = isl_id_to_ast_expr_alloc( + S->getIslCtx(), S->getNumParams() + std::distance(S->array_begin(), S->array_end())); + auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx())); + auto *Space = S->getParamSpace(); + + for (int I = 0, E = S->getNumParams(); I < E; ++I) { + isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I); + Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); + } + + for (auto &Array : S->arrays()) { + auto Id = Array.second->getBasePtrId(); + Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); + } + + isl_space_free(Space); + isl_ast_expr_free(Zero); + + return Names; + } + /// Create a new PPCG scop from the current scop. /// /// The PPCG scop is initialized with data from the current polly::Scop. From @@ -194,7 +226,7 @@ public: PPCGScop->tagged_dep_order = nullptr; PPCGScop->schedule = S->getScheduleTree(); - PPCGScop->names = nullptr; + PPCGScop->names = getNames(); PPCGScop->pet = nullptr; @@ -216,7 +248,7 @@ public: PPCGProg->ctx = S->getIslCtx(); PPCGProg->scop = PPCGScop; - PPCGProg->context = nullptr; + PPCGProg->context = isl_set_copy(PPCGScop->context); PPCGProg->read = nullptr; PPCGProg->may_write = nullptr; PPCGProg->must_write = nullptr; @@ -267,6 +299,13 @@ public: isl_schedule *Schedule = get_schedule(PPCGGen); + int has_permutable = has_any_permutable_node(Schedule); + + if (!has_permutable || has_permutable < 0) + Schedule = isl_schedule_free(Schedule); + else + Schedule = map_to_device(PPCGGen, Schedule); + if (DumpSchedule) { isl_printer *P = isl_printer_to_str(S->getIslCtx()); P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); diff --git a/polly/lib/External/ppcg/gpu.c b/polly/lib/External/ppcg/gpu.c index 05f277c..3eecf45 100644 --- a/polly/lib/External/ppcg/gpu.c +++ b/polly/lib/External/ppcg/gpu.c @@ -2375,7 +2375,7 @@ static isl_bool set_permutable(__isl_keep isl_schedule_node *node, void *user) /* Does "schedule" contain any permutable band with at least one coincident * member? */ -static int has_any_permutable_node(__isl_keep isl_schedule *schedule) +int has_any_permutable_node(__isl_keep isl_schedule *schedule) { int any_permutable = 0; @@ -4938,7 +4938,7 @@ static __isl_give isl_schedule_node *add_to_from_device( * are separated from the other children and are not mapped to * the device. */ -static __isl_give isl_schedule *map_to_device(struct gpu_gen *gen, +__isl_give isl_schedule *map_to_device(struct gpu_gen *gen, __isl_take isl_schedule *schedule) { isl_schedule_node *node; diff --git a/polly/lib/External/ppcg/gpu.h b/polly/lib/External/ppcg/gpu.h index d06ddb2..c5009c0 100644 --- a/polly/lib/External/ppcg/gpu.h +++ b/polly/lib/External/ppcg/gpu.h @@ -353,4 +353,7 @@ int generate_gpu(isl_ctx *ctx, const char *input, FILE *out, struct gpu_types *types, void *user), void *user); __isl_give isl_schedule *get_schedule(struct gpu_gen *gen); +int has_any_permutable_node(__isl_keep isl_schedule *schedule); +__isl_give isl_schedule *map_to_device(struct gpu_gen *gen, + __isl_take isl_schedule *schedule); #endif diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index 4c1bc95..7ae5010 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -17,9 +17,33 @@ ; SCHED: domain: "{ Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }" ; SCHED: child: -; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(i0)] }, { Stmt_bb5[i0, i1] -> [(i1)] }]" -; SCHED: permutable: 1 -; SCHED: coincident: [ 1, 1 ] +; SCHED: context: "{ [] }" +; SCHED: child: +; SCHED: extension: "{ }" +; SCHED: child: +; SCHED: sequence: +; SCHED: - filter: "{ }" +; SCHED: - filter: "{ Stmt_bb5[i0, i1] }" +; SCHED: child: +; SCHED: guard: "{ [] }" +; SCHED: child: +; SCHED: mark: "kernel" +; SCHED: child: +; SCHED: context: "[b0, b1, t0, t1] -> { [] : 0 <= b0 <= 255 and 0 <= b1 <= 255 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }" +; SCHED: child: +; SCHED: filter: "[b0, b1] -> { Stmt_bb5[i0, i1] : -3 - 4b0 + i0 <= 1024*floor((i0)/1024) <= -4b0 + i0 and -3 - 4b1 + i1 <= 1024*floor((i1)/1024) <= -4b1 + i1 }" +; SCHED: child: +; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(floor((i0)/1024))] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/1024))] }]" +; SCHED: permutable: 1 +; SCHED: coincident: [ 1, 1 ] +; SCHED: child: +; SCHED: filter: "[t0, t1] -> { Stmt_bb5[i0, i1] : 4*floor((-t0 + i0)/4) = -t0 + i0 and 4*floor((-t1 + i1)/4) = -t1 + i1 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }" +; SCHED: child: +; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(0)] }]" +; SCHED: permutable: 1 +; SCHED: coincident: [ 1, 1 ] +; SCHED: - filter: "{ }" + ; void double_parallel_loop(float A[][1024]) { ; for (long i = 0; i < 1024; i++) -- 2.7.4