From 34eeabbca3157958f62131fd4b7beeb4d9c203f8 Mon Sep 17 00:00:00 2001 From: Siddharth Bhat Date: Wed, 9 Aug 2017 08:29:16 +0000 Subject: [PATCH] [PPCGCodeGeneration] Compute element size in bytes for arrays correctly. Previously, we used to compute this with `elementSizeInBits / 8`. This would yield an element size of 0 when the array had element size < 8 in bits. To fix this, ask data layout what the size in bytes should be. Differential Revision: https://reviews.llvm.org/D36459 llvm-svn: 310448 --- polly/lib/CodeGen/PPCGCodeGeneration.cpp | 15 ++++++- .../array-with-elem-type-smaller-than-byte.ll | 50 ++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 polly/test/GPGPU/array-with-elem-type-smaller-than-byte.ll diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 3ab1b64..619fe06 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -778,6 +778,19 @@ void GPUNodeBuilder::allocateDeviceArrays() { ArraySize, Builder.CreateMul(Offset, Builder.getInt64(ScopArray->getElemSizeInBytes()))); + const SCEV *SizeSCEV = SE.getSCEV(ArraySize); + // It makes no sense to have an array of size 0. The CUDA API will + // throw an error anyway if we invoke `cuMallocManaged` with size `0`. We + // choose to be defensive and catch this at the compile phase. It is + // most likely that we are doing something wrong with size computation. + if (SizeSCEV->isZero()) { + errs() << getUniqueScopName(&S) + << " has computed array size 0: " << *ArraySize + << " | for array: " << *(ScopArray->getBasePtr()) + << ". This is illegal, exiting.\n"; + report_fatal_error("array size was computed to be 0"); + } + Value *DevArray = createCallAllocateMemoryForDevice(ArraySize); DevArray->setName(DevArrayName); DeviceAllocations[ScopArray] = DevArray; @@ -2905,7 +2918,7 @@ public: PPCGArray.space = Array->getSpace().release(); PPCGArray.type = strdup(TypeName.c_str()); - PPCGArray.size = Array->getElementType()->getPrimitiveSizeInBits() / 8; + PPCGArray.size = DL->getTypeAllocSize(Array->getElementType()); PPCGArray.name = strdup(Array->getName().c_str()); PPCGArray.extent = nullptr; PPCGArray.n_index = Array->getNumberOfDimensions(); diff --git a/polly/test/GPGPU/array-with-elem-type-smaller-than-byte.ll b/polly/test/GPGPU/array-with-elem-type-smaller-than-byte.ll new file mode 100644 index 0000000..5985c7a --- /dev/null +++ b/polly/test/GPGPU/array-with-elem-type-smaller-than-byte.ll @@ -0,0 +1,50 @@ +; RUN: opt %loadPolly -S -polly-codegen-ppcg \ +; RUN: -polly-use-llvm-names < %s +; ModuleID = 'test/GPGPU/zero-size-array.ll' + +; REQUIRES: pollyacc + +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + + +; We used to divide the element size by 8 to arrive at the 'actual' size +; of an array element. This used to cause arrays that have an element size +; of less than 8 to collapse to size 0. This test makes sure that it does +; not happen anymore. + +; f(int *niters_ptr, int *arr[0]) { +; const int inters = *niters_ptr; +; for(int i = 0; i < niters; i++) { +; arr[0][i + 1] = 0 +; } +; } + +; Function Attrs: nounwind uwtable +define void @f(i32* noalias %niters.ptr, [0 x i32]* noalias %arr) #0 { +entry: + %niters = load i32, i32* %niters.ptr, align 4 + br label %loop.body + +loop.body: ; preds = %loop.body, %entry + %indvar = phi i32 [ %indvar.next, %loop.body ], [ 1, %entry ] + %indvar.sext = sext i32 %indvar to i64 + %arr.slot = getelementptr [0 x i32], [0 x i32]* %arr, i64 0, i64 %indvar.sext + store i32 0, i32* %arr.slot, align 4 + %tmp8 = icmp eq i32 %indvar, %niters + %indvar.next = add i32 %indvar, 1 + br i1 %tmp8, label %loop.exit, label %loop.body + +loop.exit: ; preds = %loop.body + %tmp10 = icmp sgt i32 undef, 0 + br label %auxiliary.loop + +auxiliary.loop: ; preds = %"101", %loop.exit + %tmp11 = phi i1 [ %tmp10, %loop.exit ], [ undef, %auxiliary.loop ] + br i1 undef, label %auxiliary.loop, label %exit + +exit: ; preds = %auxiliary.loop + ret void +} + +attributes #0 = { nounwind uwtable } -- 2.7.4