From ccc767d63bea96652b97f4b726bbcf079716f33e Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Fri, 6 Dec 2019 11:59:59 -0800 Subject: [PATCH] Move GPU::FuncOp definition to ODS - NFC Move the definition of the GPU function opreation from hand-rolled C++ code to ODS framework. This only does the moves, a follow-up is necessary to clean up users of custom functions that could be auto-generated by ODS. PiperOrigin-RevId: 284233245 --- mlir/g3doc/Dialects/GPU.md | 100 +++++---------------- mlir/include/mlir/Dialect/GPU/GPUDialect.h | 82 ----------------- mlir/include/mlir/Dialect/GPU/GPUOps.td | 139 +++++++++++++++++++++++++++++ mlir/include/mlir/IR/OpBase.td | 2 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 59 ++++++------ 5 files changed, 193 insertions(+), 189 deletions(-) diff --git a/mlir/g3doc/Dialects/GPU.md b/mlir/g3doc/Dialects/GPU.md index faa0721..d34ce18 100644 --- a/mlir/g3doc/Dialects/GPU.md +++ b/mlir/g3doc/Dialects/GPU.md @@ -12,6 +12,28 @@ manipulations to launch a GPU kernel and provide a simple path towards GPU execution from MLIR. It may be targeted, for example, by DSLs using MLIR. The dialect uses `gpu` as its canonical prefix. +## Memory attribution + +Memory buffers are defined at the function level, either in "gpu.launch" or in +"gpu.func" ops. This encoding makes it clear where the memory belongs and makes +the lifetime of the memory visible. The memory is only accessible while the +kernel is launched/the function is currently invoked. The latter is more strict +than actual GPU implementations but using static memory at the function level is +just for convenience. It is also always possible to pass pointers to the +workgroup memory into other functions, provided they expect the correct memory +space. + +The buffers are considered live throughout the execution of the GPU function +body. The absence of memory attribution syntax means that the function does not +require special buffers. Rationale: although the underlying models declare +memory buffers at the module level, we chose to do it at the function level to +provide some structuring for the lifetime of those buffers; this avoids the +incentive to use the buffers for communicating between different kernels or +launches of the same kernel, which should be done through function arguments +instead; we chose not to use `alloca`-style approach that would require more +complex lifetime analysis following the principles of MLIR that promote +structure and representing analysis results in the IR. + ## Operations ### `gpu.block_dim` @@ -47,84 +69,6 @@ Example: %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) ``` -### `gpu.func` - -Defines a function that can be executed on a GPU. This supports memory -attribution and its body has a particular execution model. - -GPU functions are either kernels (as indicated by the `kernel` attribute) or -regular functions. The former can be launched from the host side, while the -latter are device side only. - -The memory attribution defines SSA values that correspond to memory buffers -allocated in the memory hierarchy of the GPU (see below). - -The operation has one attached region that corresponds to the body of the -function. The region arguments consist of the function arguments without -modification, followed by buffers defined in memory annotations. The body of a -GPU function, when launched, is executed by multiple work items. There are no -guarantees on the order in which work items execute, or on the connection -between them. In particular, work items are not necessarily executed in -lock-step. Synchronization ops such as "gpu.barrier" should be used to -coordinate work items. Declarations of GPU functions, i.e. not having the body -region, are not supported. - -#### Memory attribution - -Memory buffers are defined at the function level, either in "gpu.launch" or in -"gpu.func" ops. This encoding makes it clear where the memory belongs and makes -the lifetime of the memory visible. The memory is only accessible while the -kernel is launched/the function is currently invoked. The latter is more strict -than actual GPU implementations but using static memory at the function level is -just for convenience. It is also always possible to pass pointers to the -workgroup memory into other functions, provided they expect the correct memory -space. - -The buffers are considered live throughout the execution of the GPU function -body. The absence of memory attribution syntax means that the function does not -require special buffers. Rationale: although the underlying models declare -memory buffers at the module level, we chose to do it at the function level to -provide some structuring for the lifetime of those buffers; this avoids the -incentive to use the buffers for communicating between different kernels or -launches of the same kernel, which should be done through function arguments -instead; we chose not to use `alloca`-style approach that would require more -complex lifetime analysis following the principles of MLIR that promote -structure and representing analysis results in the IR. - -Syntax: - -``` {.ebnf} -op ::= `gpu.func` symbol-ref-id `(` argument-list `)` (`->` -function-result-list)? - memory-attribution `kernel`? function-attributes? region - -memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)? - (`private` `(` ssa-id-and-type-list `)`)? -``` - -Example: - -```mlir {.mlir} -gpu.func @foo(%arg0: index) - workgroup(%workgroup: memref<32xf32, 3>) - private(%private: memref<1xf32, 5>) - kernel - attributes {qux: "quux"} { - gpu.return -} -``` - -The generic form illustrates the concept - -```mlir {.mlir} -"gpu.func"(%arg: index) {sym_name: "foo", kernel, qux: "quux"} ({ -^bb0(%arg0: index, %workgroup: memref<32xf32, 3>, %private: memref<1xf32, 5>): - "gpu.return"() : () -> () -}) : (index) -> () -``` - -Note the non-default memory spaces used in memref types in memory-attribution. - ### `gpu.launch` Launch a kernel on the specified grid of thread blocks. The body of the kernel diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h index 619f769..194dd9c 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h @@ -197,88 +197,6 @@ private: static StringRef getKernelModuleAttrName() { return "kernel_module"; } }; -class GPUFuncOp : public Op { -public: - using Op::Op; - - /// Returns the name of the operation. - static StringRef getOperationName() { return "gpu.func"; } - - /// Constructs a FuncOp, hook for Builder methods. - static void build(Builder *builder, OperationState &result, StringRef name, - FunctionType type, ArrayRef workgroupAttributions, - ArrayRef privateAttributions, - ArrayRef attrs); - - /// Prints the Op in custom format. - void print(OpAsmPrinter &p); - - /// Parses the Op in custom format. - static ParseResult parse(OpAsmParser &parser, OperationState &result); - - /// Returns `true` if the GPU function defined by this Op is a kernel, i.e. - /// it is intended to be launched from host. - bool isKernel() { - return getAttrOfType(GPUDialect::getKernelFuncAttrName()) != - nullptr; - } - - /// Returns the type of the function this Op defines. - FunctionType getType() { - return getTypeAttr().getValue().cast(); - } - - /// Returns the number of buffers located in the workgroup memory. - unsigned getNumWorkgroupAttributions() { - return getAttrOfType(getNumWorkgroupAttributionsAttrName()) - .getInt(); - } - - /// Returns a list of block arguments that correspond to buffers located in - /// the workgroup memory - ArrayRef getWorkgroupAttributions() { - auto begin = - std::next(getBody().front().args_begin(), getType().getNumInputs()); - auto end = std::next(begin, getNumWorkgroupAttributions()); - return {begin, end}; - } - - /// Returns a list of block arguments that correspond to buffers located in - /// the private memory. - ArrayRef getPrivateAttributions() { - auto begin = - std::next(getBody().front().args_begin(), - getType().getNumInputs() + getNumWorkgroupAttributions()); - return {begin, getBody().front().args_end()}; - } - - /// Returns the name of the attribute containing the number of buffers located - /// in the workgroup memory. - static StringRef getNumWorkgroupAttributionsAttrName() { - return "workgroup_attibutions"; - } - -private: - // FunctionLike trait needs access to the functions below. - friend class OpTrait::FunctionLike; - - /// Hooks for the input/output type enumeration in FunctionLike . - unsigned getNumFuncArguments() { return getType().getNumInputs(); } - unsigned getNumFuncResults() { return getType().getNumResults(); } - - /// Returns the keywords used in the custom syntax for this Op. - static StringRef getWorkgroupKeyword() { return "workgroup"; } - static StringRef getPrivateKeyword() { return "private"; } - static StringRef getKernelKeyword() { return "kernel"; } - - /// Hook for FunctionLike verifier. - LogicalResult verifyType(); - - /// Verifies the body of the function. - LogicalResult verifyBody(); -}; - #define GET_OP_CLASSES #include "mlir/Dialect/GPU/GPUOps.h.inc" diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index 4329084..fcaa77c 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -42,6 +42,145 @@ def GPU_BlockIdOp : GPU_IndexOp<"block_id">; def GPU_GridDimOp : GPU_IndexOp<"grid_dim">; def GPU_ThreadIdOp : GPU_IndexOp<"thread_id">; +def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> { + let summary = "Function executable on a GPU"; + + let description = [{ + Defines a function that can be executed on a GPU. This supports memory + attribution and its body has a particular execution model. + + GPU functions are either kernels (as indicated by the `kernel` attribute) or + regular functions. The former can be launched from the host side, while the + latter are device side only. + + The memory attribution defines SSA values that correspond to memory buffers + allocated in the memory hierarchy of the GPU (see below). + + The operation has one attached region that corresponds to the body of the + function. The region arguments consist of the function arguments without + modification, followed by buffers defined in memory annotations. The body of + a GPU function, when launched, is executed by multiple work items. There are + no guarantees on the order in which work items execute, or on the connection + between them. In particular, work items are not necessarily executed in + lock-step. Synchronization ops such as "gpu.barrier" should be used to + coordinate work items. Declarations of GPU functions, i.e. not having the + body region, are not supported. + + Syntax: + + ``` {.ebnf} + op ::= `gpu.func` symbol-ref-id `(` argument-list `)` (`->` + function-result-list)? + memory-attribution `kernel`? function-attributes? region + + memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)? + (`private` `(` ssa-id-and-type-list `)`)? + ``` + + Example: + + ```mlir + gpu.func @foo(%arg0: index) + workgroup(%workgroup: memref<32xf32, 3>) + private(%private: memref<1xf32, 5>) + kernel + attributes {qux: "quux"} { + gpu.return + } + ``` + + The generic form illustrates the concept + + ```mlir + "gpu.func"(%arg: index) {sym_name: "foo", kernel, qux: "quux"} ({ + ^bb0(%arg0: index, %workgroup: memref<32xf32, 3>, + %private: memref<1xf32, 5>): + "gpu.return"() : () -> () + }) : (index) -> () + ``` + + Note the non-default memory spaces used in memref types in memory + attribution. + }]; + + let regions = (region AnyRegion:$body); + + let skipDefaultBuilders = 1; + + let builders = [ + OpBuilder<"Builder *builder, OperationState &result, StringRef name, " + "FunctionType type, ArrayRef workgroupAttributions, " + "ArrayRef privateAttributions, " + "ArrayRef attrs"> + ]; + + let extraClassDeclaration = [{ + /// Returns `true` if the GPU function defined by this Op is a kernel, i.e. + /// it is intended to be launched from host. + bool isKernel() { + return getAttrOfType(GPUDialect::getKernelFuncAttrName()) != + nullptr; + } + + /// Returns the type of the function this Op defines. + FunctionType getType() { + return getTypeAttr().getValue().cast(); + } + + /// Returns the number of buffers located in the workgroup memory. + unsigned getNumWorkgroupAttributions() { + return getAttrOfType(getNumWorkgroupAttributionsAttrName()) + .getInt(); + } + + /// Returns a list of block arguments that correspond to buffers located in + /// the workgroup memory + ArrayRef getWorkgroupAttributions() { + auto begin = + std::next(getBody().front().args_begin(), getType().getNumInputs()); + auto end = std::next(begin, getNumWorkgroupAttributions()); + return {begin, end}; + } + + /// Returns a list of block arguments that correspond to buffers located in + /// the private memory. + ArrayRef getPrivateAttributions() { + auto begin = + std::next(getBody().front().args_begin(), + getType().getNumInputs() + getNumWorkgroupAttributions()); + return {begin, getBody().front().args_end()}; + } + + /// Returns the name of the attribute containing the number of buffers + /// located in the workgroup memory. + static StringRef getNumWorkgroupAttributionsAttrName() { + return "workgroup_attibutions"; + } + + // FunctionLike trait needs access to the functions below. + friend class OpTrait::FunctionLike; + + /// Hooks for the input/output type enumeration in FunctionLike . + unsigned getNumFuncArguments() { return getType().getNumInputs(); } + unsigned getNumFuncResults() { return getType().getNumResults(); } + + /// Returns the keywords used in the custom syntax for this Op. + static StringRef getWorkgroupKeyword() { return "workgroup"; } + static StringRef getPrivateKeyword() { return "private"; } + static StringRef getKernelKeyword() { return "kernel"; } + + /// Hook for FunctionLike verifier. + LogicalResult verifyType(); + + /// Verifies the body of the function. + LogicalResult verifyBody(); + }]; + + // let verifier = [{ return ::verifFuncOpy(*this); }]; + let printer = [{ printGPUFuncOp(p, *this); }]; + let parser = [{ return parseGPUFuncOp(parser, result); }]; +} + def GPU_ReturnOp : GPU_Op<"return", [Terminator]>, Arguments<(ins)>, Results<(outs)> { let summary = "Terminator for GPU launch regions."; diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 6a884f2..dd7fac2 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -1335,6 +1335,8 @@ class PredOpTrait : OpTrait { def Broadcastable : NativeOpTrait<"BroadcastableTwoOperandsOneResult">; // X op Y == Y op X def Commutative : NativeOpTrait<"IsCommutative">; +// Op behaves like a function. +def FunctionLike : NativeOpTrait<"FunctionLike">; // Op is isolated from above. def IsolatedFromAbove : NativeOpTrait<"IsIsolatedFromAbove">; // Op results are float or vectors/tensors thereof. diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 8d84fad..38998b9 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -46,7 +46,7 @@ bool GPUDialect::isKernel(Operation *op) { GPUDialect::GPUDialect(MLIRContext *context) : Dialect(getDialectName(), context) { - addOperations(); @@ -165,14 +165,6 @@ static LogicalResult verifyAllReduce(gpu::AllReduceOp allReduce) { return success(); } -// Namespace avoids ambiguous ReturnOpOperandAdaptor. -namespace mlir { -namespace gpu { -#define GET_OP_CLASSES -#include "mlir/Dialect/GPU/GPUOps.cpp.inc" -} // namespace gpu -} // namespace mlir - //===----------------------------------------------------------------------===// // LaunchOp //===----------------------------------------------------------------------===// @@ -639,7 +631,7 @@ parseAttributions(OpAsmParser &parser, StringRef keyword, /// ::= `gpu.func` symbol-ref-id `(` argument-list `)` /// (`->` function-result-list)? memory-attribution `kernel`? /// function-attributes? region -ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) { +static ParseResult parseGPUFuncOp(OpAsmParser &parser, OperationState &result) { SmallVector entryArgs; SmallVector, 1> argAttrs; SmallVector, 1> resultAttrs; @@ -667,26 +659,26 @@ ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) { // not to the functiont type. Builder &builder = parser.getBuilder(); auto type = builder.getFunctionType(argTypes, resultTypes); - result.addAttribute(getTypeAttrName(), TypeAttr::get(type)); + result.addAttribute(GPUFuncOp::getTypeAttrName(), TypeAttr::get(type)); // Parse workgroup memory attributions. - if (failed(parseAttributions(parser, getWorkgroupKeyword(), entryArgs, - argTypes))) + if (failed(parseAttributions(parser, GPUFuncOp::getWorkgroupKeyword(), + entryArgs, argTypes))) return failure(); // Store the number of operands we just parsed as the number of workgroup // memory attributions. unsigned numWorkgroupAttrs = argTypes.size() - type.getNumInputs(); - result.addAttribute(getNumWorkgroupAttributionsAttrName(), + result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(), builder.getI64IntegerAttr(numWorkgroupAttrs)); // Parse private memory attributions. - if (failed( - parseAttributions(parser, getPrivateKeyword(), entryArgs, argTypes))) + if (failed(parseAttributions(parser, GPUFuncOp::getPrivateKeyword(), + entryArgs, argTypes))) return failure(); // Parse the kernel attribute if present. - if (succeeded(parser.parseOptionalKeyword(getKernelKeyword()))) + if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword()))) result.addAttribute(GPUDialect::getKernelFuncAttrName(), builder.getUnitAttr()); @@ -712,24 +704,25 @@ static void printAttributions(OpAsmPrinter &p, StringRef keyword, p << ')'; } -void GPUFuncOp::print(OpAsmPrinter &p) { - p << getOperationName() << ' '; - p.printSymbolName(getName()); +/// Prints a GPU Func op. +void printGPUFuncOp(OpAsmPrinter &p, GPUFuncOp op) { + p << GPUFuncOp::getOperationName() << ' '; + p.printSymbolName(op.getName()); - FunctionType type = getType(); - impl::printFunctionSignature(p, this->getOperation(), type.getInputs(), + FunctionType type = op.getType(); + impl::printFunctionSignature(p, op.getOperation(), type.getInputs(), /*isVariadic=*/false, type.getResults()); - printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions()); - printAttributions(p, getPrivateKeyword(), getPrivateAttributions()); - if (isKernel()) - p << ' ' << getKernelKeyword(); + printAttributions(p, op.getWorkgroupKeyword(), op.getWorkgroupAttributions()); + printAttributions(p, op.getPrivateKeyword(), op.getPrivateAttributions()); + if (op.isKernel()) + p << ' ' << op.getKernelKeyword(); - impl::printFunctionAttributes(p, this->getOperation(), type.getNumInputs(), + impl::printFunctionAttributes(p, op.getOperation(), type.getNumInputs(), type.getNumResults(), - {getNumWorkgroupAttributionsAttrName(), + {op.getNumWorkgroupAttributionsAttrName(), GPUDialect::getKernelFuncAttrName()}); - p.printRegion(getBody(), /*printEntryBlockArgs=*/false); + p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false); } /// Hook for FunctionLike verifier. @@ -762,3 +755,11 @@ LogicalResult GPUFuncOp::verifyBody() { return success(); } + +// Namespace avoids ambiguous ReturnOpOperandAdaptor. +namespace mlir { +namespace gpu { +#define GET_OP_CLASSES +#include "mlir/Dialect/GPU/GPUOps.cpp.inc" +} // namespace gpu +} // namespace mlir -- 2.7.4