From 40ef46fba4a072065c04fa16ac9309b551fd7004 Mon Sep 17 00:00:00 2001
From: Alex Zinenko <zinenko@google.com>
Date: Wed, 18 Dec 2019 03:38:18 -0800
Subject: [PATCH] Harden the requirements to memory attribution types in
 gpu.func

When memory attributions are present in `gpu.func`, require that they are of
memref type and live in memoryspaces 3 and 5 for workgroup and private memory
attributions, respectively. Adapt the conversion from the GPU dialect to the
NVVM dialect to drop the private memory space from attributions as NVVM is able
to model them as local `llvm.alloca`s in the default memory space.

PiperOrigin-RevId: 286161763
---
 mlir/include/mlir/Dialect/GPU/GPUDialect.h         |  6 +++-
 .../Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp  | 27 ++++++++++++++++--
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp             | 22 +++++++++++++++
 .../Conversion/GPUToNVVM/memory-attrbution.mlir    | 10 +++----
 mlir/test/Dialect/GPU/invalid.mlir                 | 33 ++++++++++++++++++++++
 5 files changed, 90 insertions(+), 8 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h
index 8b62c70..495238f 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h
@@ -63,7 +63,11 @@ public:
 
   /// Returns the numeric value used to identify the workgroup memory address
   /// space.
-  static int getWorkgroupAddressSpace() { return 3; }
+  static unsigned getWorkgroupAddressSpace() { return 3; }
+
+  /// Returns the numeric value used to identify the private memory address
+  /// space.
+  static unsigned getPrivateAddressSpace() { return 5; }
 
   LogicalResult verifyOperationAttribute(Operation *op,
                                          NamedAttribute attr) override;
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index f41c0c45e..4689736 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -38,6 +38,26 @@ using namespace mlir;
 
 namespace {
 
+/// Derived type converter for GPU to NVVM lowering. The GPU dialect uses memory
+/// space 5 for private memory attributions, but NVVM represents private
+/// memory allocations as local `alloca`s in the default address space. This
+/// converter drops the private memory space to support the use case above.
+class NVVMTypeConverter : public LLVMTypeConverter {
+public:
+  using LLVMTypeConverter::LLVMTypeConverter;
+
+  Type convertType(Type type) override {
+    auto memref = type.dyn_cast<MemRefType>();
+    if (memref &&
+        memref.getMemorySpace() == gpu::GPUDialect::getPrivateAddressSpace()) {
+      type = MemRefType::get(memref.getShape(), memref.getElementType(),
+                             memref.getAffineMaps());
+    }
+
+    return LLVMTypeConverter::convertType(type);
+  }
+};
+
 /// Converts all_reduce op to LLVM/NVVM ops.
 struct GPUAllReduceOpLowering : public LLVMOpLowering {
   using AccumulatorFactory = std::function<Value *(
@@ -559,9 +579,12 @@ struct GPUFuncOpLowering : LLVMOpLowering {
         assert(type && type.hasStaticShape() &&
                "unexpected type in attribution");
 
+        // Explicitly drop memory space when lowering private memory
+        // attributions since NVVM models it as `alloca`s in the default
+        // memory space and does not support `alloca`s with addrspace(5).
         auto ptrType = lowering.convertType(type.getElementType())
                            .cast<LLVM::LLVMType>()
-                           .getPointerTo(type.getMemorySpace());
+                           .getPointerTo();
         Value *numElements = rewriter.create<LLVM::ConstantOp>(
             gpuFuncOp.getLoc(), int64Ty,
             rewriter.getI64IntegerAttr(type.getNumElements()));
@@ -635,7 +658,7 @@ public:
       return;
 
     OwningRewritePatternList patterns;
-    LLVMTypeConverter converter(m.getContext());
+    NVVMTypeConverter converter(m.getContext());
     populateStdToLLVMConversionPatterns(converter, patterns);
     populateGpuToNVVMConversionPatterns(converter, patterns);
     ConversionTarget target(getContext());
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 46a568c..1c20be6 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -739,6 +739,22 @@ LogicalResult GPUFuncOp::verifyType() {
   return success();
 }
 
+static LogicalResult verifyAttributions(Operation *op,
+                                        ArrayRef<BlockArgument *> attributions,
+                                        unsigned memorySpace) {
+  for (Value *v : attributions) {
+    auto type = v->getType().dyn_cast<MemRefType>();
+    if (!type)
+      return op->emitOpError() << "expected memref type in attribution";
+
+    if (type.getMemorySpace() != memorySpace) {
+      return op->emitOpError()
+             << "expected memory space " << memorySpace << " in attribution";
+    }
+  }
+  return success();
+}
+
 /// Verifies the body of the function.
 LogicalResult GPUFuncOp::verifyBody() {
   unsigned numFuncArguments = getNumArguments();
@@ -758,6 +774,12 @@ LogicalResult GPUFuncOp::verifyBody() {
                            << blockArgType;
   }
 
+  if (failed(verifyAttributions(getOperation(), getWorkgroupAttributions(),
+                                GPUDialect::getWorkgroupAddressSpace())) ||
+      failed(verifyAttributions(getOperation(), getPrivateAttributions(),
+                                GPUDialect::getPrivateAddressSpace())))
+    return failure();
+
   return success();
 }
 
diff --git a/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir b/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir
index b8ec626..69a16b2 100644
--- a/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir
@@ -2,7 +2,7 @@
 
 module attributes {gpu.kernel_module} {
   // CHECK-LABEL:  llvm.func @private
-  gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32>) {
+  gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) {
     // Allocate private memory inside the function.
     // CHECK: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64
     // CHECK: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float*">
@@ -24,7 +24,7 @@ module attributes {gpu.kernel_module} {
     // CHECK: llvm.getelementptr
     // CHECK: llvm.store
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<4xf32>
+    store %arg0, %arg1[%c0] : memref<4xf32, 5>
 
     "terminator"() : () -> ()
   }
@@ -123,7 +123,7 @@ module attributes {gpu.kernel_module} {
   // CHECK-LABEL: llvm.func @multiple
   gpu.func @multiple(%arg0: f32)
       workgroup(%arg1: memref<1xf32, 3>, %arg2: memref<2xf32, 3>)
-      private(%arg3: memref<3xf32>, %arg4: memref<4xf32>) {
+      private(%arg3: memref<3xf32, 5>, %arg4: memref<4xf32, 5>) {
 
     // Workgroup buffers.
     // CHECK: llvm.mlir.addressof @[[buffer1]]
@@ -138,8 +138,8 @@ module attributes {gpu.kernel_module} {
     %c0 = constant 0 : index
     store %arg0, %arg1[%c0] : memref<1xf32, 3>
     store %arg0, %arg2[%c0] : memref<2xf32, 3>
-    store %arg0, %arg3[%c0] : memref<3xf32>
-    store %arg0, %arg4[%c0] : memref<4xf32>
+    store %arg0, %arg3[%c0] : memref<3xf32, 5>
+    store %arg0, %arg4[%c0] : memref<4xf32, 5>
     "terminator"() : () -> ()
   }
 }
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 2065595..f8ed1a9 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -382,3 +382,36 @@ module {
     }) {sym_name="kernel_1", type=f32} : () -> ()
   }
 }
+
+// -----
+
+module {
+  module @gpu_funcs attributes {gpu.kernel_module} {
+    // expected-error @+1 {{expected memref type in attribution}}
+    gpu.func @kernel() workgroup(%0: i32) {
+      gpu.return
+    }
+  }
+}
+
+// -----
+
+module {
+  module @gpu_funcs attributes {gpu.kernel_module} {
+    // expected-error @+1 {{expected memory space 3 in attribution}}
+    gpu.func @kernel() workgroup(%0: memref<4xf32>) {
+      gpu.return
+    }
+  }
+}
+
+// -----
+
+module {
+  module @gpu_funcs attributes {gpu.kernel_module} {
+    // expected-error @+1 {{expected memory space 5 in attribution}}
+    gpu.func @kernel() private(%0: memref<4xf32>) {
+      gpu.return
+    }
+  }
+}
-- 
2.7.4