/// Returns the numeric value used to identify the workgroup memory address
/// space.
- static int getWorkgroupAddressSpace() { return 3; }
+ static unsigned getWorkgroupAddressSpace() { return 3; }
+
+ /// Returns the numeric value used to identify the private memory address
+ /// space.
+ static unsigned getPrivateAddressSpace() { return 5; }
LogicalResult verifyOperationAttribute(Operation *op,
NamedAttribute attr) override;
namespace {
+/// Derived type converter for GPU to NVVM lowering. The GPU dialect uses memory
+/// space 5 for private memory attributions, but NVVM represents private
+/// memory allocations as local `alloca`s in the default address space. This
+/// converter drops the private memory space to support the use case above.
+class NVVMTypeConverter : public LLVMTypeConverter {
+public:
+ using LLVMTypeConverter::LLVMTypeConverter;
+
+ Type convertType(Type type) override {
+ auto memref = type.dyn_cast<MemRefType>();
+ if (memref &&
+ memref.getMemorySpace() == gpu::GPUDialect::getPrivateAddressSpace()) {
+ type = MemRefType::get(memref.getShape(), memref.getElementType(),
+ memref.getAffineMaps());
+ }
+
+ return LLVMTypeConverter::convertType(type);
+ }
+};
+
/// Converts all_reduce op to LLVM/NVVM ops.
struct GPUAllReduceOpLowering : public LLVMOpLowering {
using AccumulatorFactory = std::function<Value *(
assert(type && type.hasStaticShape() &&
"unexpected type in attribution");
+ // Explicitly drop memory space when lowering private memory
+ // attributions since NVVM models it as `alloca`s in the default
+ // memory space and does not support `alloca`s with addrspace(5).
auto ptrType = lowering.convertType(type.getElementType())
.cast<LLVM::LLVMType>()
- .getPointerTo(type.getMemorySpace());
+ .getPointerTo();
Value *numElements = rewriter.create<LLVM::ConstantOp>(
gpuFuncOp.getLoc(), int64Ty,
rewriter.getI64IntegerAttr(type.getNumElements()));
return;
OwningRewritePatternList patterns;
- LLVMTypeConverter converter(m.getContext());
+ NVVMTypeConverter converter(m.getContext());
populateStdToLLVMConversionPatterns(converter, patterns);
populateGpuToNVVMConversionPatterns(converter, patterns);
ConversionTarget target(getContext());
return success();
}
+static LogicalResult verifyAttributions(Operation *op,
+ ArrayRef<BlockArgument *> attributions,
+ unsigned memorySpace) {
+ for (Value *v : attributions) {
+ auto type = v->getType().dyn_cast<MemRefType>();
+ if (!type)
+ return op->emitOpError() << "expected memref type in attribution";
+
+ if (type.getMemorySpace() != memorySpace) {
+ return op->emitOpError()
+ << "expected memory space " << memorySpace << " in attribution";
+ }
+ }
+ return success();
+}
+
/// Verifies the body of the function.
LogicalResult GPUFuncOp::verifyBody() {
unsigned numFuncArguments = getNumArguments();
<< blockArgType;
}
+ if (failed(verifyAttributions(getOperation(), getWorkgroupAttributions(),
+ GPUDialect::getWorkgroupAddressSpace())) ||
+ failed(verifyAttributions(getOperation(), getPrivateAttributions(),
+ GPUDialect::getPrivateAddressSpace())))
+ return failure();
+
return success();
}
module attributes {gpu.kernel_module} {
// CHECK-LABEL: llvm.func @private
- gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32>) {
+ gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) {
// Allocate private memory inside the function.
// CHECK: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64
// CHECK: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float*">
// CHECK: llvm.getelementptr
// CHECK: llvm.store
%c0 = constant 0 : index
- store %arg0, %arg1[%c0] : memref<4xf32>
+ store %arg0, %arg1[%c0] : memref<4xf32, 5>
"terminator"() : () -> ()
}
// CHECK-LABEL: llvm.func @multiple
gpu.func @multiple(%arg0: f32)
workgroup(%arg1: memref<1xf32, 3>, %arg2: memref<2xf32, 3>)
- private(%arg3: memref<3xf32>, %arg4: memref<4xf32>) {
+ private(%arg3: memref<3xf32, 5>, %arg4: memref<4xf32, 5>) {
// Workgroup buffers.
// CHECK: llvm.mlir.addressof @[[buffer1]]
%c0 = constant 0 : index
store %arg0, %arg1[%c0] : memref<1xf32, 3>
store %arg0, %arg2[%c0] : memref<2xf32, 3>
- store %arg0, %arg3[%c0] : memref<3xf32>
- store %arg0, %arg4[%c0] : memref<4xf32>
+ store %arg0, %arg3[%c0] : memref<3xf32, 5>
+ store %arg0, %arg4[%c0] : memref<4xf32, 5>
"terminator"() : () -> ()
}
}
}) {sym_name="kernel_1", type=f32} : () -> ()
}
}
+
+// -----
+
+module {
+ module @gpu_funcs attributes {gpu.kernel_module} {
+ // expected-error @+1 {{expected memref type in attribution}}
+ gpu.func @kernel() workgroup(%0: i32) {
+ gpu.return
+ }
+ }
+}
+
+// -----
+
+module {
+ module @gpu_funcs attributes {gpu.kernel_module} {
+ // expected-error @+1 {{expected memory space 3 in attribution}}
+ gpu.func @kernel() workgroup(%0: memref<4xf32>) {
+ gpu.return
+ }
+ }
+}
+
+// -----
+
+module {
+ module @gpu_funcs attributes {gpu.kernel_module} {
+ // expected-error @+1 {{expected memory space 5 in attribution}}
+ gpu.func @kernel() private(%0: memref<4xf32>) {
+ gpu.return
+ }
+ }
+}