From 2d010daf854e97f8d799d7316f1f8615211fca56 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Fri, 15 Jul 2016 10:51:14 +0000 Subject: [PATCH] GPGPU: Make sure scops with more than one array work We use this opportunity to add a test case containing a scalar parameter. llvm-svn: 275547 --- polly/lib/CodeGen/PPCGCodeGeneration.cpp | 1 + polly/test/GPGPU/scalar-parameter.ll | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 polly/test/GPGPU/scalar-parameter.ll diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index abc03c40..b3ec5f6 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -408,6 +408,7 @@ public: PPCGArray.dep_order = nullptr; setArrayBounds(PPCGArray, Array); + i++; } } diff --git a/polly/test/GPGPU/scalar-parameter.ll b/polly/test/GPGPU/scalar-parameter.ll new file mode 100644 index 0000000..9e76a76 --- /dev/null +++ b/polly/test/GPGPU/scalar-parameter.ll @@ -0,0 +1,55 @@ +; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \ +; RUN: -disable-output < %s | \ +; RUN: FileCheck -check-prefix=CODE %s + +; REQUIRES: pollyacc +; +; CODE: Code +; CODE-NEXT: ==== +; CODE-NEXT: # host +; CODE-NEXT: { +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(float), cudaMemcpyHostToDevice)); +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_b, &MemRef_b, sizeof(float), cudaMemcpyHostToDevice)); +; CODE-NEXT: { +; CODE-NEXT: dim3 k0_dimBlock(32); +; CODE-NEXT: dim3 k0_dimGrid(32); +; CODE-NEXT: kernel0 <<>> (); +; CODE-NEXT: cudaCheckKernel(); +; CODE-NEXT: } + +; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(float), cudaMemcpyDeviceToHost)); +; CODE-NEXT: } + +; CODE: # kernel0 +; CODE-NEXT: Stmt_bb2(32 * b0 + t0); + +; void foo(float A[], float b) { +; for (long i = 0; i < 1024; i++) +; A[i] += b; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(float* %A, float %b) { +bb: + br label %bb1 + +bb1: ; preds = %bb5, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ] + %exitcond = icmp ne i64 %i.0, 1024 + br i1 %exitcond, label %bb2, label %bb7 + +bb2: ; preds = %bb1 + %tmp = getelementptr inbounds float, float* %A, i64 %i.0 + %tmp3 = load float, float* %tmp, align 4 + %tmp4 = fadd float %tmp3, %b + store float %tmp4, float* %tmp, align 4 + br label %bb5 + +bb5: ; preds = %bb2 + %tmp6 = add nuw nsw i64 %i.0, 1 + br label %bb1 + +bb7: ; preds = %bb1 + ret void +} -- 2.7.4