From e938517e379160d3033557af5c6fcf8a89a4910f Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Thu, 14 Jul 2016 10:22:19 +0000 Subject: [PATCH] GPGPU: create default initialized PPCG scop and gpu program At this stage, we do not yet modify the IR but just generate a default initialized ppcg_scop and gpu_prog and free both immediately. Both will later be filled with data from the polly::Scop and are needed to use PPCG for GPU schedule generation. This commit does not yet perform any GPU code generation, but ensures that the basic infrastructure has been put in place. We also add a simple test case to ensure the new code is run and use this opportunity to verify that GPU_CODEGEN tests are only run if GPU code generation has been enabled in cmake. llvm-svn: 275389 --- polly/CMakeLists.txt | 6 ++ polly/lib/CodeGen/PPCGCodeGeneration.cpp | 138 ++++++++++++++++++++++++++++++- polly/lib/External/ppcg/ppcg.c | 4 +- polly/lib/External/ppcg/ppcg.h | 2 + polly/test/GPGPU/double-parallel-loop.ll | 62 ++++++++++++++ polly/test/lit.site.cfg.in | 5 +- 6 files changed, 214 insertions(+), 3 deletions(-) create mode 100644 polly/test/GPGPU/double-parallel-loop.ll diff --git a/polly/CMakeLists.txt b/polly/CMakeLists.txt index 250f8e5..d4f467a 100644 --- a/polly/CMakeLists.txt +++ b/polly/CMakeLists.txt @@ -121,8 +121,11 @@ if (POLLY_ENABLE_GPGPU_CODEGEN) # a cuda library. FIND_PACKAGE(CUDA) set(GPU_CODEGEN TRUE) +else(POLLY_ENABLE_GPGPU_CODEGEN) + set(GPU_CODEGEN FALSE) endif(POLLY_ENABLE_GPGPU_CODEGEN) + # Support GPGPU code generation if the library is available. if (CUDALIB_FOUND) INCLUDE_DIRECTORIES( ${CUDALIB_INCLUDE_DIR} ) @@ -134,6 +137,9 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/lib/JSON/include ${CMAKE_CURRENT_BINARY_DIR}/lib/External/isl/include ${CMAKE_CURRENT_SOURCE_DIR}/lib/External/isl/include + ${CMAKE_CURRENT_SOURCE_DIR}/lib/External/pet/include + ${CMAKE_CURRENT_BINARY_DIR}/lib/External/ppcg + ${CMAKE_CURRENT_SOURCE_DIR}/lib/External/ppcg ${CMAKE_CURRENT_BINARY_DIR}/include ) diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 8fa3b7a..995957b 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -22,6 +22,11 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +extern "C" { +#include "gpu.h" +#include "ppcg.h" +} + #include "llvm/Support/Debug.h" using namespace polly; @@ -34,9 +39,140 @@ class PPCGCodeGeneration : public ScopPass { public: static char ID; + /// The scop that is currently processed. + Scop *S; + PPCGCodeGeneration() : ScopPass(ID) {} - bool runOnScop(Scop &S) override { return true; } + /// Construct compilation options for PPCG. + /// + /// @returns The compilation options. + ppcg_options *createPPCGOptions() { + auto DebugOptions = + (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options)); + auto Options = (ppcg_options *)malloc(sizeof(ppcg_options)); + + DebugOptions->dump_schedule_constraints = false; + DebugOptions->dump_schedule = false; + DebugOptions->dump_final_schedule = false; + DebugOptions->dump_sizes = false; + + Options->debug = DebugOptions; + + Options->reschedule = true; + Options->scale_tile_loops = false; + Options->wrap = false; + + Options->non_negative_parameters = false; + Options->ctx = nullptr; + Options->sizes = nullptr; + + Options->use_private_memory = false; + Options->use_shared_memory = false; + Options->max_shared_memory = 0; + + Options->target = PPCG_TARGET_CUDA; + Options->openmp = false; + Options->linearize_device_arrays = true; + Options->live_range_reordering = false; + + Options->opencl_compiler_options = nullptr; + Options->opencl_use_gpu = false; + Options->opencl_n_include_file = 0; + Options->opencl_include_files = nullptr; + Options->opencl_print_kernel_types = false; + Options->opencl_embed_kernel_code = false; + + Options->save_schedule_file = nullptr; + Options->load_schedule_file = nullptr; + + return Options; + } + + /// Create a new PPCG scop from the current scop. + /// + /// For now the created scop is initialized to 'zero' and does not contain + /// any scop-specific information. + /// + /// @returns A new ppcg scop. + ppcg_scop *createPPCGScop() { + auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop)); + + PPCGScop->options = createPPCGOptions(); + + PPCGScop->start = 0; + PPCGScop->end = 0; + + PPCGScop->context = nullptr; + PPCGScop->domain = nullptr; + PPCGScop->call = nullptr; + PPCGScop->tagged_reads = nullptr; + PPCGScop->reads = nullptr; + PPCGScop->live_in = nullptr; + PPCGScop->tagged_may_writes = nullptr; + PPCGScop->may_writes = nullptr; + PPCGScop->tagged_must_writes = nullptr; + PPCGScop->must_writes = nullptr; + PPCGScop->live_out = nullptr; + PPCGScop->tagged_must_kills = nullptr; + PPCGScop->tagger = nullptr; + + PPCGScop->independence = nullptr; + PPCGScop->dep_flow = nullptr; + PPCGScop->tagged_dep_flow = nullptr; + PPCGScop->dep_false = nullptr; + PPCGScop->dep_forced = nullptr; + PPCGScop->dep_order = nullptr; + PPCGScop->tagged_dep_order = nullptr; + + PPCGScop->schedule = nullptr; + PPCGScop->names = nullptr; + + PPCGScop->pet = nullptr; + + return PPCGScop; + } + + /// Create a default-initialized PPCG GPU program. + /// + /// @returns A new gpu grogram description. + gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) { + + if (!PPCGScop) + return nullptr; + + auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog); + + PPCGProg->ctx = S->getIslCtx(); + PPCGProg->scop = PPCGScop; + PPCGProg->context = nullptr; + PPCGProg->read = nullptr; + PPCGProg->may_write = nullptr; + PPCGProg->must_write = nullptr; + PPCGProg->tagged_must_kill = nullptr; + PPCGProg->may_persist = nullptr; + PPCGProg->to_outer = nullptr; + PPCGProg->to_inner = nullptr; + PPCGProg->any_to_outer = nullptr; + PPCGProg->array_order = nullptr; + PPCGProg->n_stmts = 0; + PPCGProg->stmts = nullptr; + PPCGProg->n_array = 0; + PPCGProg->array = nullptr; + + return PPCGProg; + } + + bool runOnScop(Scop &CurrentScop) override { + S = &CurrentScop; + + auto PPCGScop = createPPCGScop(); + auto PPCGProg = createPPCGProg(PPCGScop); + gpu_prog_free(PPCGProg); + ppcg_scop_free(PPCGScop); + + return true; + } void printScop(raw_ostream &, Scop &) const override {} diff --git a/polly/lib/External/ppcg/ppcg.c b/polly/lib/External/ppcg/ppcg.c index 3417902..1c514a6 100644 --- a/polly/lib/External/ppcg/ppcg.c +++ b/polly/lib/External/ppcg/ppcg.c @@ -813,7 +813,7 @@ static __isl_give isl_set *set_intersect_str(__isl_take isl_set *set, return set; } -static void *ppcg_scop_free(struct ppcg_scop *ps) +void *ppcg_scop_free(struct ppcg_scop *ps) { if (!ps) return NULL; @@ -1007,6 +1007,7 @@ static int check_options(isl_ctx *ctx) return 0; } +#if 0 int main(int argc, char **argv) { int r; @@ -1037,3 +1038,4 @@ int main(int argc, char **argv) return r; } +#endif diff --git a/polly/lib/External/ppcg/ppcg.h b/polly/lib/External/ppcg/ppcg.h index d64ea67..c03fc3c 100644 --- a/polly/lib/External/ppcg/ppcg.h +++ b/polly/lib/External/ppcg/ppcg.h @@ -114,4 +114,6 @@ int ppcg_transform(isl_ctx *ctx, const char *input, FILE *out, __isl_give isl_printer *(*fn)(__isl_take isl_printer *p, struct ppcg_scop *scop, void *user), void *user); +void *ppcg_scop_free(struct ppcg_scop *ps); + #endif diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll new file mode 100644 index 0000000..35a1219 --- /dev/null +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s +; REQUIRES: pollyacc + +; CHECK: Stmt_bb5 +; CHECK: Domain := +; CHECK: { Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }; +; CHECK: Schedule := +; CHECK: { Stmt_bb5[i0, i1] -> [i0, i1] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] }; +; +; void double_parallel_loop(float A[][1024]) { +; for (long i = 0; i < 1024; i++) +; for (long j = 0; j < 1024; j++) +; A[i][j] += i * j; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @double_parallel_loop([1024 x float]* %A) { +bb: + br label %bb2 + +bb2: ; preds = %bb13, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp14, %bb13 ] + %exitcond1 = icmp ne i64 %i.0, 1024 + br i1 %exitcond1, label %bb3, label %bb15 + +bb3: ; preds = %bb2 + br label %bb4 + +bb4: ; preds = %bb10, %bb3 + %j.0 = phi i64 [ 0, %bb3 ], [ %tmp11, %bb10 ] + %exitcond = icmp ne i64 %j.0, 1024 + br i1 %exitcond, label %bb5, label %bb12 + +bb5: ; preds = %bb4 + %tmp = mul nuw nsw i64 %i.0, %j.0 + %tmp6 = sitofp i64 %tmp to float + %tmp7 = getelementptr inbounds [1024 x float], [1024 x float]* %A, i64 %i.0, i64 %j.0 + %tmp8 = load float, float* %tmp7, align 4 + %tmp9 = fadd float %tmp8, %tmp6 + store float %tmp9, float* %tmp7, align 4 + br label %bb10 + +bb10: ; preds = %bb5 + %tmp11 = add nuw nsw i64 %j.0, 1 + br label %bb4 + +bb12: ; preds = %bb4 + br label %bb13 + +bb13: ; preds = %bb12 + %tmp14 = add nuw nsw i64 %i.0, 1 + br label %bb2 + +bb15: ; preds = %bb2 + ret void +} diff --git a/polly/test/lit.site.cfg.in b/polly/test/lit.site.cfg.in index c94e51c..0132268 100644 --- a/polly/test/lit.site.cfg.in +++ b/polly/test/lit.site.cfg.in @@ -7,7 +7,7 @@ config.llvm_libs_dir = "@LLVM_LIBS_DIR@" config.polly_obj_root = "@POLLY_BINARY_DIR@" config.polly_lib_dir = "@POLLY_LIB_DIR@" config.target_triple = "@TARGET_TRIPLE@" -config.enable_gpgpu_codegen = "@CUDALIB_FOUND@" +config.enable_gpgpu_codegen = "@GPU_CODEGEN@" config.link_polly_into_tools = "@LINK_POLLY_INTO_TOOLS@" ## Check the current platform with regex @@ -45,5 +45,8 @@ else: + ' -polly-remarks-minimal ' )) +if config.enable_gpgpu_codegen == 'TRUE' : + config.available_features.add('pollyacc') + # Let the main config do the real work. lit_config.load_config(config, "@POLLY_SOURCE_DIR@/test/lit.cfg") -- 2.7.4