From 40ef46fba4a072065c04fa16ac9309b551fd7004 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Wed, 18 Dec 2019 03:38:18 -0800 Subject: [PATCH] Harden the requirements to memory attribution types in gpu.func When memory attributions are present in `gpu.func`, require that they are of memref type and live in memoryspaces 3 and 5 for workgroup and private memory attributions, respectively. Adapt the conversion from the GPU dialect to the NVVM dialect to drop the private memory space from attributions as NVVM is able to model them as local `llvm.alloca`s in the default memory space. PiperOrigin-RevId: 286161763 --- mlir/include/mlir/Dialect/GPU/GPUDialect.h | 6 +++- .../Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 27 ++++++++++++++++-- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 22 +++++++++++++++ .../Conversion/GPUToNVVM/memory-attrbution.mlir | 10 +++---- mlir/test/Dialect/GPU/invalid.mlir | 33 ++++++++++++++++++++++ 5 files changed, 90 insertions(+), 8 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h index 8b62c70..495238f 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h @@ -63,7 +63,11 @@ public: /// Returns the numeric value used to identify the workgroup memory address /// space. - static int getWorkgroupAddressSpace() { return 3; } + static unsigned getWorkgroupAddressSpace() { return 3; } + + /// Returns the numeric value used to identify the private memory address + /// space. + static unsigned getPrivateAddressSpace() { return 5; } LogicalResult verifyOperationAttribute(Operation *op, NamedAttribute attr) override; diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index f41c0c45e..4689736 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -38,6 +38,26 @@ using namespace mlir; namespace { +/// Derived type converter for GPU to NVVM lowering. The GPU dialect uses memory +/// space 5 for private memory attributions, but NVVM represents private +/// memory allocations as local `alloca`s in the default address space. This +/// converter drops the private memory space to support the use case above. +class NVVMTypeConverter : public LLVMTypeConverter { +public: + using LLVMTypeConverter::LLVMTypeConverter; + + Type convertType(Type type) override { + auto memref = type.dyn_cast(); + if (memref && + memref.getMemorySpace() == gpu::GPUDialect::getPrivateAddressSpace()) { + type = MemRefType::get(memref.getShape(), memref.getElementType(), + memref.getAffineMaps()); + } + + return LLVMTypeConverter::convertType(type); + } +}; + /// Converts all_reduce op to LLVM/NVVM ops. struct GPUAllReduceOpLowering : public LLVMOpLowering { using AccumulatorFactory = std::function() - .getPointerTo(type.getMemorySpace()); + .getPointerTo(); Value *numElements = rewriter.create( gpuFuncOp.getLoc(), int64Ty, rewriter.getI64IntegerAttr(type.getNumElements())); @@ -635,7 +658,7 @@ public: return; OwningRewritePatternList patterns; - LLVMTypeConverter converter(m.getContext()); + NVVMTypeConverter converter(m.getContext()); populateStdToLLVMConversionPatterns(converter, patterns); populateGpuToNVVMConversionPatterns(converter, patterns); ConversionTarget target(getContext()); diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 46a568c..1c20be6 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -739,6 +739,22 @@ LogicalResult GPUFuncOp::verifyType() { return success(); } +static LogicalResult verifyAttributions(Operation *op, + ArrayRef attributions, + unsigned memorySpace) { + for (Value *v : attributions) { + auto type = v->getType().dyn_cast(); + if (!type) + return op->emitOpError() << "expected memref type in attribution"; + + if (type.getMemorySpace() != memorySpace) { + return op->emitOpError() + << "expected memory space " << memorySpace << " in attribution"; + } + } + return success(); +} + /// Verifies the body of the function. LogicalResult GPUFuncOp::verifyBody() { unsigned numFuncArguments = getNumArguments(); @@ -758,6 +774,12 @@ LogicalResult GPUFuncOp::verifyBody() { << blockArgType; } + if (failed(verifyAttributions(getOperation(), getWorkgroupAttributions(), + GPUDialect::getWorkgroupAddressSpace())) || + failed(verifyAttributions(getOperation(), getPrivateAttributions(), + GPUDialect::getPrivateAddressSpace()))) + return failure(); + return success(); } diff --git a/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir b/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir index b8ec626..69a16b2 100644 --- a/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir +++ b/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir @@ -2,7 +2,7 @@ module attributes {gpu.kernel_module} { // CHECK-LABEL: llvm.func @private - gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32>) { + gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) { // Allocate private memory inside the function. // CHECK: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64 // CHECK: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> @@ -24,7 +24,7 @@ module attributes {gpu.kernel_module} { // CHECK: llvm.getelementptr // CHECK: llvm.store %c0 = constant 0 : index - store %arg0, %arg1[%c0] : memref<4xf32> + store %arg0, %arg1[%c0] : memref<4xf32, 5> "terminator"() : () -> () } @@ -123,7 +123,7 @@ module attributes {gpu.kernel_module} { // CHECK-LABEL: llvm.func @multiple gpu.func @multiple(%arg0: f32) workgroup(%arg1: memref<1xf32, 3>, %arg2: memref<2xf32, 3>) - private(%arg3: memref<3xf32>, %arg4: memref<4xf32>) { + private(%arg3: memref<3xf32, 5>, %arg4: memref<4xf32, 5>) { // Workgroup buffers. // CHECK: llvm.mlir.addressof @[[buffer1]] @@ -138,8 +138,8 @@ module attributes {gpu.kernel_module} { %c0 = constant 0 : index store %arg0, %arg1[%c0] : memref<1xf32, 3> store %arg0, %arg2[%c0] : memref<2xf32, 3> - store %arg0, %arg3[%c0] : memref<3xf32> - store %arg0, %arg4[%c0] : memref<4xf32> + store %arg0, %arg3[%c0] : memref<3xf32, 5> + store %arg0, %arg4[%c0] : memref<4xf32, 5> "terminator"() : () -> () } } diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index 2065595..f8ed1a9 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -382,3 +382,36 @@ module { }) {sym_name="kernel_1", type=f32} : () -> () } } + +// ----- + +module { + module @gpu_funcs attributes {gpu.kernel_module} { + // expected-error @+1 {{expected memref type in attribution}} + gpu.func @kernel() workgroup(%0: i32) { + gpu.return + } + } +} + +// ----- + +module { + module @gpu_funcs attributes {gpu.kernel_module} { + // expected-error @+1 {{expected memory space 3 in attribution}} + gpu.func @kernel() workgroup(%0: memref<4xf32>) { + gpu.return + } + } +} + +// ----- + +module { + module @gpu_funcs attributes {gpu.kernel_module} { + // expected-error @+1 {{expected memory space 5 in attribution}} + gpu.func @kernel() private(%0: memref<4xf32>) { + gpu.return + } + } +} -- 2.7.4