From f919d8b36071bdacd699e28d7788bb904ba5f23d Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Thu, 4 Aug 2016 13:57:29 +0000 Subject: [PATCH] GPGPU: Support scalars that are mapped to shared memory llvm-svn: 277726 --- polly/lib/CodeGen/IslExprBuilder.cpp | 7 ++- polly/lib/CodeGen/PPCGCodeGeneration.cpp | 14 +++--- polly/test/GPGPU/shared-memory-scalar.ll | 75 ++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 10 deletions(-) create mode 100644 polly/test/GPGPU/shared-memory-scalar.ll diff --git a/polly/lib/CodeGen/IslExprBuilder.cpp b/polly/lib/CodeGen/IslExprBuilder.cpp index 75e3507..30f4b0b 100644 --- a/polly/lib/CodeGen/IslExprBuilder.cpp +++ b/polly/lib/CodeGen/IslExprBuilder.cpp @@ -212,7 +212,7 @@ Value *IslExprBuilder::createAccessAddress(isl_ast_expr *Expr) { "isl ast expression not of type isl_ast_op"); assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_access && "not an access isl ast expression"); - assert(isl_ast_expr_get_op_n_arg(Expr) >= 2 && + assert(isl_ast_expr_get_op_n_arg(Expr) >= 1 && "We need at least two operands to create a member access."); Value *Base, *IndexOp, *Access; @@ -250,6 +250,11 @@ Value *IslExprBuilder::createAccessAddress(isl_ast_expr *Expr) { Builder.CreateBitCast(Base, PointerTy, "polly.access.cast." + BaseName); } + if (isl_ast_expr_get_op_n_arg(Expr) == 1) { + isl_ast_expr_free(Expr); + return Base; + } + IndexOp = nullptr; for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 8b6679b..481e20b 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -1252,16 +1252,12 @@ void GPUNodeBuilder::createKernelVariables(ppcg_kernel *Kernel, Function *FN) { isl_id *Id = isl_space_get_tuple_id(Var.array->space, isl_dim_set); Type *EleTy = ScopArrayInfo::getFromId(Id)->getElementType(); + Type *ArrayTy = EleTy; SmallVector Sizes; - isl_val *V0 = isl_vec_get_element_val(Var.size, 0); - long Bound = isl_val_get_num_si(V0); - isl_val_free(V0); - Sizes.push_back(S.getSE()->getConstant(Builder.getInt64Ty(), Bound)); - ArrayType *ArrayTy = ArrayType::get(EleTy, Bound); - for (unsigned int j = 1; j < Var.array->n_index; ++j) { + for (unsigned int j = 0; j < Var.array->n_index; ++j) { isl_val *Val = isl_vec_get_element_val(Var.size, j); - Bound = isl_val_get_num_si(Val); + long Bound = isl_val_get_num_si(Val); isl_val_free(Val); Sizes.push_back(S.getSE()->getConstant(Builder.getInt64Ty(), Bound)); ArrayTy = ArrayType::get(ArrayTy, Bound); @@ -1274,8 +1270,8 @@ void GPUNodeBuilder::createKernelVariables(ppcg_kernel *Kernel, Function *FN) { *M, ArrayTy, false, GlobalValue::InternalLinkage, 0, Var.name, nullptr, GlobalValue::ThreadLocalMode::NotThreadLocal, 3); GlobalVar->setAlignment(EleTy->getPrimitiveSizeInBits() / 8); - ConstantAggregateZero *Zero = ConstantAggregateZero::get(ArrayTy); - GlobalVar->setInitializer(Zero); + GlobalVar->setInitializer(Constant::getNullValue(ArrayTy)); + Allocation = GlobalVar; } else if (Var.type == ppcg_access_private) { Allocation = Builder.CreateAlloca(ArrayTy, 0, "private_array"); diff --git a/polly/test/GPGPU/shared-memory-scalar.ll b/polly/test/GPGPU/shared-memory-scalar.ll new file mode 100644 index 0000000..d10a6ac --- /dev/null +++ b/polly/test/GPGPU/shared-memory-scalar.ll @@ -0,0 +1,75 @@ +; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \ +; RUN: -polly-acc-use-shared \ +; RUN: -disable-output < %s | \ +; RUN: FileCheck -check-prefix=CODE %s + +; RUN: opt %loadPolly -polly-codegen-ppcg \ +; RUN: -polly-acc-use-shared \ +; RUN: -disable-output -polly-acc-dump-kernel-ir < %s | \ +; RUN: FileCheck -check-prefix=KERNEL %s + +; REQUIRES: pollyacc + +; void add(float *A, float alpha) { +; for (long i = 0; i < 32; i++) +; for (long j = 0; j < 10; j++) +; A[i] += alpha; +; } + +; CODE: read(t0); +; CODE-NEXT: if (t0 == 0) +; CODE-NEXT: read(); +; CODE-NEXT: sync0(); +; CODE-NEXT: for (int c3 = 0; c3 <= 9; c3 += 1) +; CODE-NEXT: Stmt_bb5(t0, c3); +; CODE-NEXT: sync1(); +; CODE-NEXT: write(t0); + + +; KERNEL: @shared_MemRef_alpha = internal addrspace(3) global float 0.000000e+00, align 4 + +; KERNEL: %polly.access.cast.MemRef_alpha = bitcast i8* %MemRef_alpha to float* +; KERNEL-NEXT: %shared.read1 = load float, float* %polly.access.cast.MemRef_alpha +; KERNEL-NEXT: store float %shared.read1, float addrspace(3)* @shared_MemRef_alpha + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @add(float* %A, float %alpha) { +bb: + br label %bb2 + +bb2: ; preds = %bb11, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] + %exitcond1 = icmp ne i64 %i.0, 32 + br i1 %exitcond1, label %bb3, label %bb13 + +bb3: ; preds = %bb2 + br label %bb4 + +bb4: ; preds = %bb8, %bb3 + %j.0 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb8 ] + %exitcond = icmp ne i64 %j.0, 10 + br i1 %exitcond, label %bb5, label %bb10 + +bb5: ; preds = %bb4 + %tmp = getelementptr inbounds float, float* %A, i64 %i.0 + %tmp6 = load float, float* %tmp, align 4 + %tmp7 = fadd float %tmp6, %alpha + store float %tmp7, float* %tmp, align 4 + br label %bb8 + +bb8: ; preds = %bb5 + %tmp9 = add nuw nsw i64 %j.0, 1 + br label %bb4 + +bb10: ; preds = %bb4 + br label %bb11 + +bb11: ; preds = %bb10 + %tmp12 = add nuw nsw i64 %i.0, 1 + br label %bb2 + +bb13: ; preds = %bb2 + ret void +} -- 2.7.4