From b9fc860a575df0a7fdea7b08090deb6246995355 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Mon, 18 Jul 2016 15:44:32 +0000 Subject: [PATCH] GPGPU: collect array references Initialize the list of references to a GPU array to ensure that the arrays that need to be passed to kernel calls are computed correctly. Furthermore, the very same information is also necessary to compute synchronization correctly. As the functionality to compute these references is already available, what is left for us to do is only to connect the necessary functionality to compute array reference information. llvm-svn: 275798 --- polly/lib/CodeGen/PPCGCodeGeneration.cpp | 2 ++ polly/lib/External/ppcg/gpu.c | 2 +- polly/lib/External/ppcg/gpu.h | 1 + polly/test/GPGPU/double-parallel-loop.ll | 2 +- polly/test/GPGPU/host-control-flow.ll | 2 +- polly/test/GPGPU/host-statement.ll | 14 ++++++++------ polly/test/GPGPU/scalar-parameter.ll | 32 ++++++++++++++++---------------- polly/test/GPGPU/scheduler-timeout.ll | 4 ++-- 8 files changed, 32 insertions(+), 27 deletions(-) diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index dc5f56f..33cf8c3 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -450,6 +450,8 @@ public: setArrayBounds(PPCGArray, Array); i++; + + collect_references(PPCGProg, &PPCGArray); } } diff --git a/polly/lib/External/ppcg/gpu.c b/polly/lib/External/ppcg/gpu.c index e76e149..8916d2d 100644 --- a/polly/lib/External/ppcg/gpu.c +++ b/polly/lib/External/ppcg/gpu.c @@ -55,7 +55,7 @@ static const char *get_outer_array_name(__isl_keep isl_map *access) /* Collect all references to the given array and store pointers to them * in array->refs. */ -static void collect_references(struct gpu_prog *prog, +void collect_references(struct gpu_prog *prog, struct gpu_array_info *array) { int i; diff --git a/polly/lib/External/ppcg/gpu.h b/polly/lib/External/ppcg/gpu.h index 78bccdf..0fa7f76 100644 --- a/polly/lib/External/ppcg/gpu.h +++ b/polly/lib/External/ppcg/gpu.h @@ -371,4 +371,5 @@ __isl_give isl_ast_node *generate_code(struct gpu_gen *gen, __isl_take isl_schedule *schedule); __isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog); +void collect_references(struct gpu_prog *prog, struct gpu_array_info *array); #endif diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index 8cfa5d9..1e977db 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -69,7 +69,7 @@ ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(16, 32); ; CODE-NEXT: dim3 k0_dimGrid(32, 32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } diff --git a/polly/test/GPGPU/host-control-flow.ll b/polly/test/GPGPU/host-control-flow.ll index 0ee9dec..a848e6c 100644 --- a/polly/test/GPGPU/host-control-flow.ll +++ b/polly/test/GPGPU/host-control-flow.ll @@ -18,7 +18,7 @@ ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(4); -; CODE-NEXT: kernel0 <<>> (c0); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A, c0); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } diff --git a/polly/test/GPGPU/host-statement.ll b/polly/test/GPGPU/host-statement.ll index cf68f73..9283ca3 100644 --- a/polly/test/GPGPU/host-statement.ll +++ b/polly/test/GPGPU/host-statement.ll @@ -20,7 +20,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(16); -; CODE-NEXT: kernel0 <<>> (p_0, p_1); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A, dev_MemRef_Q, p_0, p_1); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -28,14 +28,14 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0 ; CODE-NEXT: { ; CODE-NEXT: dim3 k1_dimBlock(32); ; CODE-NEXT: dim3 k1_dimGrid(p_1 <= -1048034 ? 32768 : -p_1 + floord(31 * p_1 + 30, 32) + 16); -; CODE-NEXT: kernel1 <<>> (p_0, p_1); +; CODE-NEXT: kernel1 <<>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } ; CODE: { ; CODE-NEXT: dim3 k2_dimBlock(16, 32); ; CODE-NEXT: dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16); -; CODE-NEXT: kernel2 <<>> (p_0, p_1); +; CODE-NEXT: kernel2 <<>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -53,11 +53,13 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0 ; CODE: # kernel1 ; CODE-NEXT: for (int c0 = 0; c0 <= (-p_1 - 32 * b0 + 510) / 1048576; c0 += 1) -; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510) { -; CODE-NEXT: Stmt_for_body35(32 * b0 + t0 + 1048576 * c0); -; CODE-NEXT: for (int c1 = 0; c1 <= 15; c1 += 1) +; CODE-NEXT: for (int c1 = 0; c1 <= 15; c1 += 1) { +; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510 && c1 == 0) +; CODE-NEXT: Stmt_for_body35(32 * b0 + t0 + 1048576 * c0); +; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510) ; CODE-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1) ; CODE-NEXT: Stmt_for_body42(32 * b0 + t0 + 1048576 * c0, 32 * c1 + c3); +; CODE-NEXT: sync0(); ; CODE-NEXT: } ; CODE: # kernel2 diff --git a/polly/test/GPGPU/scalar-parameter.ll b/polly/test/GPGPU/scalar-parameter.ll index 9d7883e..871ced1 100644 --- a/polly/test/GPGPU/scalar-parameter.ll +++ b/polly/test/GPGPU/scalar-parameter.ll @@ -13,7 +13,7 @@ ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A, dev_MemRef_b); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -63,7 +63,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A, dev_MemRef_b); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -113,7 +113,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A, dev_MemRef_b); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -163,7 +163,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A, dev_MemRef_b); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -213,7 +213,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A, dev_MemRef_b); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -263,7 +263,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A, dev_MemRef_b); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -312,7 +312,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -361,7 +361,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -410,7 +410,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -459,7 +459,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -508,7 +508,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -557,7 +557,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -606,7 +606,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -655,7 +655,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -704,7 +704,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -753,7 +753,7 @@ bb7: ; preds = %bb1 ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(32); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_A); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } diff --git a/polly/test/GPGPU/scheduler-timeout.ll b/polly/test/GPGPU/scheduler-timeout.ll index 4c6fed6..727e04a 100644 --- a/polly/test/GPGPU/scheduler-timeout.ll +++ b/polly/test/GPGPU/scheduler-timeout.ll @@ -40,14 +40,14 @@ target triple = "x86_64-unknown-linux-gnu" ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(16, 32); ; CODE-NEXT: dim3 k0_dimGrid(128, 128); -; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: kernel0 <<>> (dev_MemRef_tmp, dev_MemRef_A, dev_MemRef_alpha, dev_MemRef_B); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } ; CODE: { ; CODE-NEXT: dim3 k1_dimBlock(16, 32); ; CODE-NEXT: dim3 k1_dimGrid(128, 128); -; CODE-NEXT: kernel1 <<>> (); +; CODE-NEXT: kernel1 <<>> (dev_MemRef_tmp, dev_MemRef_D, dev_MemRef_beta, dev_MemRef_C); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } -- 2.7.4