From d277feda9121245ddf7d619a67e5338444e52937 Mon Sep 17 00:00:00 2001 From: Siddharth Bhat Date: Tue, 25 Apr 2017 08:08:29 +0000 Subject: [PATCH] [PPCGCodeGeneration] Update PPCG Code Generation for OpenCL compatibility Added a small change to the way pointer arguments are set in the kernel code generation. The way the pointer is retrieved now, specifically requests global address space to be annotated. This is necessary, if the IR should be run through NVPTX to generate OpenCL compatible PTX. The changes do not affect the PTX Strings generated for the CUDA target (nvptx64-nvidia-cuda), but are necessary for OpenCL (nvptx64-nvidia-nvcl). Additionally, the data layout has been updated to what the NVPTX Backend requests/recommends. Contributed-by: Philipp Schaad Reviewers: Meinersbur, grosser, bollu Reviewed By: grosser, bollu Subscribers: jlebar, pollydev, llvm-commits, nemanjai, yaxunl, Anastasia Tags: #polly Differential Revision: https://reviews.llvm.org/D32215 llvm-svn: 301299 --- polly/lib/CodeGen/PPCGCodeGeneration.cpp | 18 ++++++++---- polly/test/GPGPU/cuda-annotations.ll | 4 +-- polly/test/GPGPU/host-control-flow.ll | 32 +++++++++++----------- polly/test/GPGPU/kernel-params-only-some-arrays.ll | 8 +++--- polly/test/GPGPU/kernel-params-scop-parameter.ll | 2 +- polly/test/GPGPU/non-read-only-scalars.ll | 4 +-- polly/test/GPGPU/phi-nodes-in-kernel.ll | 4 +-- polly/test/GPGPU/private-memory.ll | 12 ++++---- .../GPGPU/remove-dead-instructions-in-stmt-2.ll | 2 +- .../test/GPGPU/remove-dead-instructions-in-stmt.ll | 2 +- polly/test/GPGPU/shared-memory-two-dimensional.ll | 4 +-- polly/test/GPGPU/shared-memory.ll | 12 ++++---- 12 files changed, 55 insertions(+), 49 deletions(-) diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index aeef7ec..1545ac5 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -1273,12 +1273,17 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { /// /// @param is64Bit Are we looking for a 64 bit architecture? static std::string computeNVPTXDataLayout(bool is64Bit) { - std::string Ret = "e"; + std::string Ret = ""; - if (!is64Bit) - Ret += "-p:32:32"; - - Ret += "-i64:64-v16:16-v32:32-n16:32:64"; + if (!is64Bit) { + Ret += "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:" + "64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:" + "64-v128:128:128-n16:32:64"; + } else { + Ret += "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:" + "64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:" + "64-v128:128:128-n16:32:64"; + } return Ret; } @@ -1298,7 +1303,8 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel, const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id); Args.push_back(SAI->getElementType()); } else { - Args.push_back(Builder.getInt8PtrTy()); + static const int UseGlobalMemory = 1; + Args.push_back(Builder.getInt8PtrTy(UseGlobalMemory)); } } diff --git a/polly/test/GPGPU/cuda-annotations.ll b/polly/test/GPGPU/cuda-annotations.ll index 0adac1e..ed1fa6a 100644 --- a/polly/test/GPGPU/cuda-annotations.ll +++ b/polly/test/GPGPU/cuda-annotations.ll @@ -4,11 +4,11 @@ ; REQUIRES: pollyacc -; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) #0 { +; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 { ; KERNEL: !nvvm.annotations = !{!0} -; KERNEL: !0 = !{void (i8*, i64)* @kernel_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1} +; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @kernel_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1} target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/GPGPU/host-control-flow.ll b/polly/test/GPGPU/host-control-flow.ll index 7597c1c..9e940aa 100644 --- a/polly/test/GPGPU/host-control-flow.ll +++ b/polly/test/GPGPU/host-control-flow.ll @@ -42,7 +42,7 @@ ; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98 ; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit -; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) +; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %c0) ; KERNEL-IR-LABEL: entry: ; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64 @@ -65,35 +65,35 @@ ; KERNEL-IR-NEXT: br label %polly.stmt.for.body3 ; KERNEL-IR-LABEL: polly.stmt.for.body3: ; preds = %polly.then -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %pexp.pdiv_r = urem i64 %c0, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A = mul nsw i64 %pexp.pdiv_r, 100 ; KERNEL-IR-NEXT: %7 = mul nsw i64 32, %b0 ; KERNEL-IR-NEXT: %8 = add nsw i64 %7, %t0 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A = add nsw i64 %polly.access.mul.MemRef_A, %8 -; KERNEL-IR-NEXT: %polly.access.MemRef_A = getelementptr float, float* %polly.access.cast.MemRef_A, i64 %polly.access.add.MemRef_A -; KERNEL-IR-NEXT: %tmp_p_scalar_ = load float, float* %polly.access.MemRef_A, align 4 -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A1 = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %polly.access.add.MemRef_A +; KERNEL-IR-NEXT: %tmp_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A, align 4 +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A1 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %pexp.pdiv_r2 = urem i64 %c0, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A3 = mul nsw i64 %pexp.pdiv_r2, 100 ; KERNEL-IR-NEXT: %9 = mul nsw i64 32, %b0 ; KERNEL-IR-NEXT: %10 = add nsw i64 %9, %t0 ; KERNEL-IR-NEXT: %11 = add nsw i64 %10, 1 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A4 = add nsw i64 %polly.access.mul.MemRef_A3, %11 -; KERNEL-IR-NEXT: %polly.access.MemRef_A5 = getelementptr float, float* %polly.access.cast.MemRef_A1, i64 %polly.access.add.MemRef_A4 -; KERNEL-IR-NEXT: %tmp2_p_scalar_ = load float, float* %polly.access.MemRef_A5, align 4 +; KERNEL-IR-NEXT: %polly.access.MemRef_A5 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A1, i64 %polly.access.add.MemRef_A4 +; KERNEL-IR-NEXT: %tmp2_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A5, align 4 ; KERNEL-IR-NEXT: %p_add = fadd float %tmp_p_scalar_, %tmp2_p_scalar_ -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A6 = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A6 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %pexp.pdiv_r7 = urem i64 %c0, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A8 = mul nsw i64 %pexp.pdiv_r7, 100 ; KERNEL-IR-NEXT: %12 = mul nsw i64 32, %b0 ; KERNEL-IR-NEXT: %13 = add nsw i64 %12, %t0 ; KERNEL-IR-NEXT: %14 = add nsw i64 %13, 2 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A9 = add nsw i64 %polly.access.mul.MemRef_A8, %14 -; KERNEL-IR-NEXT: %polly.access.MemRef_A10 = getelementptr float, float* %polly.access.cast.MemRef_A6, i64 %polly.access.add.MemRef_A9 -; KERNEL-IR-NEXT: %tmp3_p_scalar_ = load float, float* %polly.access.MemRef_A10, align 4 +; KERNEL-IR-NEXT: %polly.access.MemRef_A10 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A6, i64 %polly.access.add.MemRef_A9 +; KERNEL-IR-NEXT: %tmp3_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A10, align 4 ; KERNEL-IR-NEXT: %p_add12 = fadd float %p_add, %tmp3_p_scalar_ -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A11 = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A11 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %15 = add nsw i64 %c0, 1 ; KERNEL-IR-NEXT: %pexp.pdiv_r12 = urem i64 %15, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A13 = mul nsw i64 %pexp.pdiv_r12, 100 @@ -101,10 +101,10 @@ ; KERNEL-IR-NEXT: %17 = add nsw i64 %16, %t0 ; KERNEL-IR-NEXT: %18 = add nsw i64 %17, 1 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A14 = add nsw i64 %polly.access.mul.MemRef_A13, %18 -; KERNEL-IR-NEXT: %polly.access.MemRef_A15 = getelementptr float, float* %polly.access.cast.MemRef_A11, i64 %polly.access.add.MemRef_A14 -; KERNEL-IR-NEXT: %tmp4_p_scalar_ = load float, float* %polly.access.MemRef_A15, align 4 +; KERNEL-IR-NEXT: %polly.access.MemRef_A15 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A11, i64 %polly.access.add.MemRef_A14 +; KERNEL-IR-NEXT: %tmp4_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A15, align 4 ; KERNEL-IR-NEXT: %p_add17 = fadd float %tmp4_p_scalar_, %p_add12 -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A16 = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A16 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %19 = add nsw i64 %c0, 1 ; KERNEL-IR-NEXT: %pexp.pdiv_r17 = urem i64 %19, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A18 = mul nsw i64 %pexp.pdiv_r17, 100 @@ -112,8 +112,8 @@ ; KERNEL-IR-NEXT: %21 = add nsw i64 %20, %t0 ; KERNEL-IR-NEXT: %22 = add nsw i64 %21, 1 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A19 = add nsw i64 %polly.access.mul.MemRef_A18, %22 -; KERNEL-IR-NEXT: %polly.access.MemRef_A20 = getelementptr float, float* %polly.access.cast.MemRef_A16, i64 %polly.access.add.MemRef_A19 -; KERNEL-IR-NEXT: store float %p_add17, float* %polly.access.MemRef_A20, align 4 +; KERNEL-IR-NEXT: %polly.access.MemRef_A20 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A16, i64 %polly.access.add.MemRef_A19 +; KERNEL-IR-NEXT: store float %p_add17, float addrspace(1)* %polly.access.MemRef_A20, align 4 ; KERNEL-IR-NEXT: br label %polly.merge ; KERNEL-IR-LABEL: polly.else: ; preds = %polly.cond diff --git a/polly/test/GPGPU/kernel-params-only-some-arrays.ll b/polly/test/GPGPU/kernel-params-only-some-arrays.ll index 5ed555b..193de95 100644 --- a/polly/test/GPGPU/kernel-params-only-some-arrays.ll +++ b/polly/test/GPGPU/kernel-params-only-some-arrays.ll @@ -18,10 +18,10 @@ ; KERNEL: ; ModuleID = 'kernel_0' ; KERNEL-NEXT: source_filename = "kernel_0" -; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) +; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 @@ -33,10 +33,10 @@ ; KERNEL: ; ModuleID = 'kernel_1' ; KERNEL-NEXT: source_filename = "kernel_1" -; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) +; KERNEL: define ptx_kernel void @kernel_1(i8 addrspace(1)* %MemRef_B) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 diff --git a/polly/test/GPGPU/kernel-params-scop-parameter.ll b/polly/test/GPGPU/kernel-params-scop-parameter.ll index 5ec5d65..7f07f2d 100644 --- a/polly/test/GPGPU/kernel-params-scop-parameter.ll +++ b/polly/test/GPGPU/kernel-params-scop-parameter.ll @@ -9,7 +9,7 @@ ; A[i] += 42; ; } -; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) +; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n) target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/GPGPU/non-read-only-scalars.ll b/polly/test/GPGPU/non-read-only-scalars.ll index bebf021..ba2674c 100644 --- a/polly/test/GPGPU/non-read-only-scalars.ll +++ b/polly/test/GPGPU/non-read-only-scalars.ll @@ -87,10 +87,10 @@ ; CODE-NEXT: Stmt_bb20(c0); ; KERNEL-IR: store float %p_tmp23, float* %sum.0.phiops -; KERNEL-IR-NEXT: [[REGA:%.+]] = bitcast i8* %MemRef_sum_0__phi to float* +; KERNEL-IR-NEXT: [[REGA:%.+]] = addrspacecast i8 addrspace(1)* %MemRef_sum_0__phi to float* ; KERNEL-IR-NEXT: [[REGB:%.+]] = load float, float* %sum.0.phiops ; KERNEL-IR-NEXT: store float [[REGB]], float* [[REGA]] -; KERNEL-IR-NEXT: [[REGC:%.+]] = bitcast i8* %MemRef_sum_0 to float* +; KERNEL-IR-NEXT: [[REGC:%.+]] = addrspacecast i8 addrspace(1)* %MemRef_sum_0 to float* ; KERNEL-IR-NEXT: [[REGD:%.+]] = load float, float* %sum.0.s2a ; KERNEL-IR-NEXT: store float [[REGD]], float* [[REGC]] ; KERNEL-IR-NEXT: ret void diff --git a/polly/test/GPGPU/phi-nodes-in-kernel.ll b/polly/test/GPGPU/phi-nodes-in-kernel.ll index f367096..e2780cb 100644 --- a/polly/test/GPGPU/phi-nodes-in-kernel.ll +++ b/polly/test/GPGPU/phi-nodes-in-kernel.ll @@ -49,10 +49,10 @@ target triple = "x86_64-unknown-linux-gnu" ; KERNEL-IR: entry: ; KERNEL-IR-NEXT: %out_l.055.s2a = alloca i32 ; KERNEL-IR-NEXT: %out_l.055.phiops = alloca i32 -; KERNEL-IR-NEXT: %1 = bitcast i8* %MemRef_out_l_055__phi to i32* +; KERNEL-IR-NEXT: %1 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055__phi to i32* ; KERNEL-IR-NEXT: %2 = load i32, i32* %1 ; KERNEL-IR-NEXT: store i32 %2, i32* %out_l.055.phiops -; KERNEL-IR-NEXT: %3 = bitcast i8* %MemRef_out_l_055 to i32* +; KERNEL-IR-NEXT: %3 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055 to i32* ; KERNEL-IR-NEXT: %4 = load i32, i32* %3 ; KERNEL-IR-NEXT: store i32 %4, i32* %out_l.055.s2a diff --git a/polly/test/GPGPU/private-memory.ll b/polly/test/GPGPU/private-memory.ll index 882f420..2a34a1f 100644 --- a/polly/test/GPGPU/private-memory.ll +++ b/polly/test/GPGPU/private-memory.ll @@ -28,17 +28,17 @@ ; KERNEL: %polly.access.cast.private_array = bitcast [1 x float]* %private_array to float* ; KERNEL-NEXT: %polly.access.private_array = getelementptr float, float* %polly.access.cast.private_array, i64 0 -; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8* %MemRef_A to float* -; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float* %polly.access.cast.MemRef_A, i64 %t0 -; KERNEL-NEXT: %shared.read = load float, float* %polly.access.MemRef_A +; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* +; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %t0 +; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_A ; KERNEL-NEXT: store float %shared.read, float* %polly.access.private_array ; KERNEL: %polly.access.cast.private_array5 = bitcast [1 x float]* %private_array to float* ; KERNEL-NEXT: %polly.access.private_array6 = getelementptr float, float* %polly.access.cast.private_array5, i64 0 -; KERNEL-NEXT: %polly.access.cast.MemRef_A7 = bitcast i8* %MemRef_A to float* -; KERNEL-NEXT: %polly.access.MemRef_A8 = getelementptr float, float* %polly.access.cast.MemRef_A7, i64 %t0 +; KERNEL-NEXT: %polly.access.cast.MemRef_A7 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* +; KERNEL-NEXT: %polly.access.MemRef_A8 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A7, i64 %t0 ; KERNEL-NEXT: %shared.write = load float, float* %polly.access.private_array6 -; KERNEL-NEXT: store float %shared.write, float* %polly.access.MemRef_A8 +; KERNEL-NEXT: store float %shared.write, float addrspace(1)* %polly.access.MemRef_A8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/GPGPU/remove-dead-instructions-in-stmt-2.ll b/polly/test/GPGPU/remove-dead-instructions-in-stmt-2.ll index 251e966..fba959c 100644 --- a/polly/test/GPGPU/remove-dead-instructions-in-stmt-2.ll +++ b/polly/test/GPGPU/remove-dead-instructions-in-stmt-2.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; KERNEL-IR: store i32 0, i32* %polly.access.MemRef_sum_c, align 4 +; KERNEL-IR: store i32 0, i32 addrspace(1)* %polly.access.MemRef_sum_c, align 4 ; KERNEL-IR-NEXT: br label %polly.merge define void @kernel_dynprog([50 x [50 x i32]]* %sum_c) { diff --git a/polly/test/GPGPU/remove-dead-instructions-in-stmt.ll b/polly/test/GPGPU/remove-dead-instructions-in-stmt.ll index 3b57b55..9a0a1cd 100644 --- a/polly/test/GPGPU/remove-dead-instructions-in-stmt.ll +++ b/polly/test/GPGPU/remove-dead-instructions-in-stmt.ll @@ -10,7 +10,7 @@ ; condition. This code referred to CPU registers and consequently resulted ; in invalid bitcode. -; KERNEL-IR: store i32 0, i32* %polly.access.MemRef_sum_c, align 4 +; KERNEL-IR: store i32 0, i32 addrspace(1)* %polly.access.MemRef_sum_c, align 4 ; KERNEL-IR-NEXT: br label %polly.merge target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/GPGPU/shared-memory-two-dimensional.ll b/polly/test/GPGPU/shared-memory-two-dimensional.ll index 4b4fcfe..0fe60bd 100644 --- a/polly/test/GPGPU/shared-memory-two-dimensional.ll +++ b/polly/test/GPGPU/shared-memory-two-dimensional.ll @@ -36,8 +36,8 @@ ; KERNEL: %polly.access.mul.MemRef_b = mul nsw i64 %polly.indvar, 8 ; KERNEL-NEXT: %polly.access.add.MemRef_b = add nsw i64 %polly.access.mul.MemRef_b, %t0 -; KERNEL-NEXT: %polly.access.MemRef_b = getelementptr float, float* %polly.access.cast.MemRef_b, i64 %polly.access.add.MemRef_b -; KERNEL-NEXT: %shared.read = load float, float* %polly.access.MemRef_b +; KERNEL-NEXT: %polly.access.MemRef_b = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_b, i64 %polly.access.add.MemRef_b +; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_b ; KERNEL-NEXT: store float %shared.read, float addrspace(3)* %polly.access.shared_MemRef_b diff --git a/polly/test/GPGPU/shared-memory.ll b/polly/test/GPGPU/shared-memory.ll index 559df1a..bdcfb0c 100644 --- a/polly/test/GPGPU/shared-memory.ll +++ b/polly/test/GPGPU/shared-memory.ll @@ -29,16 +29,16 @@ ; KERNEL: @shared_MemRef_A = internal addrspace(3) global [32 x float] zeroinitializer, align 4 ; KERNEL: %polly.access.shared_MemRef_A = getelementptr float, float addrspace(3)* getelementptr inbounds ([32 x float], [32 x float] addrspace(3)* @shared_MemRef_A, i32 0, i32 0), i64 %t0 -; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8* %MemRef_A to float* -; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float* %polly.access.cast.MemRef_A, i64 %t0 -; KERNEL-NEXT: %shared.read = load float, float* %polly.access.MemRef_A +; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* +; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %t0 +; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_A ; KERNEL-NEXT: store float %shared.read, float addrspace(3)* %polly.access.shared_MemRef_A ; KERNEL: %polly.access.shared_MemRef_A3 = getelementptr float, float addrspace(3)* getelementptr inbounds ([32 x float], [32 x float] addrspace(3)* @shared_MemRef_A, i32 0, i32 0), i64 %t0 -; KERNEL-NEXT: %polly.access.cast.MemRef_A4 = bitcast i8* %MemRef_A to float* -; KERNEL-NEXT: %polly.access.MemRef_A5 = getelementptr float, float* %polly.access.cast.MemRef_A4, i64 %t0 +; KERNEL-NEXT: %polly.access.cast.MemRef_A4 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* +; KERNEL-NEXT: %polly.access.MemRef_A5 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A4, i64 %t0 ; KERNEL-NEXT: %shared.write = load float, float addrspace(3)* %polly.access.shared_MemRef_A3 -; KERNEL-NEXT: store float %shared.write, float* %polly.access.MemRef_A5 +; KERNEL-NEXT: store float %shared.write, float addrspace(1)* %polly.access.MemRef_A5 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -- 2.7.4