%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
```
-### `gpu.launch_func`
-
-Launch a kernel function on the specified grid of thread blocks. `gpu.launch`
-operations are lowered to `gpu.launch_func` operations by outlining the kernel
-body into a function in a dedicated module, which reflects the separate
-compilation process. The kernel function is required to have the `gpu.kernel`
-attribute. The module containing the kernel function is required to have the
-`gpu.kernel_module` attribute and must be named. And finally, the module
-containing the kernel module (which thus cannot be the top-level module) is
-required to have the `gpu.container_module` attribute. The `gpu.launch_func`
-operation has a string attribute named `kernel` to specify the name of the
-kernel function to launch and an attribute named `kernel_module` to specify the
-name of the module containing that kernel function.
-
-The operation takes at least six operands, with the first three operands being
-grid sizes along x,y,z dimensions and the following three being block sizes
-along x,y,z dimensions. When a lower-dimensional kernel is required, unused
-sizes must be explicitly set to `1`. The remaining operands are passed as
-arguments to the kernel function.
-
-A custom syntax for this operation is currently not available.
-
-Example:
-
-```mlir
-module attributes {gpu.container_module} {
-
- // This module creates a separate compilation unit for the GPU compiler.
- module @kernels attributes {gpu.kernel_module} {
- func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
- attributes { nvvm.kernel = true } {
-
- // Operations that produce block/thread IDs and dimensions are injected when
- // outlining the `gpu.launch` body to a function called by `gpu.launch_func`.
- %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
- %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
- %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
-
- %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
- %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
- %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
-
- %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
- %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
- %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
-
- %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
- %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
- %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
-
- "some_op"(%bx, %tx) : (index, index) -> ()
- %42 = load %arg1[%bx] : memref<?xf32, 1>
- }
- }
-
- "gpu.launch_func"(%cst, %cst, %cst, // Grid sizes.
- %cst, %cst, %cst, // Block sizes.
- %arg0, %arg1) // Arguments passed to the kernel function.
- { kernel_module = @kernels, // Module containing the kernel function.
- kernel = "kernel_1" } // Kernel function.
- : (index, index, index, index, index, index, f32, !llvm<"float*">) -> ()
-}
-```
-
### `gpu.thread_id`
Returns the thread id, i.e. the index of the current thread within the block
Value *z;
};
-/// Operation to launch a kernel given as outlined function.
-class LaunchFuncOp : public Op<LaunchFuncOp, OpTrait::AtLeastNOperands<6>::Impl,
- OpTrait::ZeroResult> {
-public:
- using Op::Op;
-
- static void build(Builder *builder, OperationState &result, FuncOp kernelFunc,
- Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ,
- Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ,
- ValueRange kernelOperands);
-
- static void build(Builder *builder, OperationState &result, FuncOp kernelFunc,
- KernelDim3 gridSize, KernelDim3 blockSize,
- ValueRange kernelOperands);
-
- /// The kernel function specified by the operation's `kernel` attribute.
- StringRef kernel();
- /// The number of operands passed to the kernel function.
- unsigned getNumKernelOperands();
- /// The name of the kernel module specified by the operation's `kernel_module`
- /// attribute.
- StringRef getKernelModuleName();
- /// The i-th operand passed to the kernel function.
- Value *getKernelOperand(unsigned i);
-
- /// Get the SSA values passed as operands to specify the grid size.
- KernelDim3 getGridSizeOperandValues();
- /// Get the SSA values passed as operands to specify the block size.
- KernelDim3 getBlockSizeOperandValues();
-
- LogicalResult verify();
-
- static StringRef getOperationName() { return "gpu.launch_func"; }
-
- /// The number of launch configuration operands, placed at the leading
- /// positions of the operand list.
- static constexpr unsigned kNumConfigOperands = 6;
-
-private:
- // This needs to quietly verify if attributes with names defined below are
- // present since it is run before the verifier of this op.
- friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
- NamedAttribute);
-
- /// The name of the symbolRef attribute specifying the kernel to launch.
- static StringRef getKernelAttrName() { return "kernel"; }
-
- /// The name of the symbolRef attribute specifying the name of the module
- /// containing the kernel to launch.
- static StringRef getKernelModuleAttrName() { return "kernel_module"; }
-};
-
#define GET_OP_CLASSES
#include "mlir/Dialect/GPU/GPUOps.h.inc"
#define GPU_OPS
include "mlir/IR/OpBase.td"
+include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
+
+// Type constraint accepting standard integers, indices and wrapped LLVM integer
+// types.
+def IntLikeOrLLVMInt : TypeConstraint<
+ Or<[AnyInteger.predicate, Index.predicate, LLVMInt.predicate]>,
+ "integer, index or LLVM dialect equivalent">;
+
+//===----------------------------------------------------------------------===//
+// GPU Dialect operations.
+//===----------------------------------------------------------------------===//
def GPU_Dialect : Dialect {
let name = "gpu";
let parser = [{ return parseGPUFuncOp(parser, result); }];
}
+def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
+ Arguments<(ins IntLikeOrLLVMInt:$gridSizeX, IntLikeOrLLVMInt:$gridSizeY,
+ IntLikeOrLLVMInt:$gridSizeZ, IntLikeOrLLVMInt:$blockSizeX,
+ IntLikeOrLLVMInt:$blockSizeY, IntLikeOrLLVMInt:$blockSizeZ,
+ Variadic<AnyType>:$operands)>,
+ Results<(outs)> {
+ let summary = "Launches a function as a GPU kerneel";
+
+ let description = [{
+ Launch a kernel function on the specified grid of thread blocks.
+ `gpu.launch` operations are lowered to `gpu.launch_func` operations by
+ outlining the kernel body into a function in a dedicated module, which
+ reflects the separate compilation process. The kernel function is required
+ to have the `gpu.kernel` attribute. The module containing the kernel
+ function is required to have the `gpu.kernel_module` attribute and must be
+ named. And finally, the module containing the kernel module (which thus
+ cannot be the top-level module) is required to have the
+ `gpu.container_module` attribute. The `gpu.launch_func` operation has a
+ string attribute named `kernel` to specify the name of the kernel function
+ to launch and an attribute named `kernel_module` to specify the name of the
+ module containing that kernel function.
+
+ The operation takes at least six operands, with the first three operands
+ being grid sizes along x,y,z dimensions and the following three being block
+ sizes along x,y,z dimensions. When a lower-dimensional kernel is required,
+ unused sizes must be explicitly set to `1`. The remaining operands are
+ passed as arguments to the kernel function.
+
+ A custom syntax for this operation is currently not available.
+
+ Example:
+
+ ```mlir
+ module attributes {gpu.container_module} {
+
+ // This module creates a separate compilation unit for the GPU compiler.
+ module @kernels attributes {gpu.kernel_module} {
+ func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
+ attributes { nvvm.kernel = true } {
+
+ // Operations that produce block/thread IDs and dimensions are
+ // injected when outlining the `gpu.launch` body to a function called
+ // by `gpu.launch_func`.
+ %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
+ %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
+ %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
+
+ %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
+ %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
+ %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
+
+ %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
+ %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
+ %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
+
+ %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
+ %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
+ %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
+
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ %42 = load %arg1[%bx] : memref<?xf32, 1>
+ }
+ }
+
+ "gpu.launch_func"(%cst, %cst, %cst, // Grid sizes.
+ %cst, %cst, %cst, // Block sizes.
+ %arg0, %arg1) // Arguments passed to the kernel.
+ { kernel_module = @kernels, // Module containing the kernel.
+ kernel = "kernel_1" } // Kernel function.
+ : (index, index, index, index, index, index, f32, !llvm<"float*">)
+ -> ()
+ }
+ ```
+ }];
+
+ let skipDefaultBuilders = 1;
+
+ let builders = [
+ OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
+ "Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ, "
+ "Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ, "
+ "ValueRange kernelOperands">,
+ OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
+ "KernelDim3 gridSize, KernelDim3 blockSize, "
+ "ValueRange kernelOperands">
+ ];
+
+ let extraClassDeclaration = [{
+ /// The kernel function specified by the operation's `kernel` attribute.
+ StringRef kernel();
+
+ /// The number of operands passed to the kernel function.
+ unsigned getNumKernelOperands();
+
+ /// The name of the kernel module specified by the operation's
+ /// `kernel_module` attribute.
+ StringRef getKernelModuleName();
+
+ /// The i-th operand passed to the kernel function.
+ Value *getKernelOperand(unsigned i);
+
+ /// Get the SSA values passed as operands to specify the grid size.
+ KernelDim3 getGridSizeOperandValues();
+
+ /// Get the SSA values passed as operands to specify the block size.
+ KernelDim3 getBlockSizeOperandValues();
+
+ /// The number of launch configuration operands, placed at the leading
+ /// positions of the operand list.
+ static constexpr unsigned kNumConfigOperands = 6;
+
+ // This needs to quietly verify if attributes with names defined below are
+ // present since it is run before the verifier of this op.
+ friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
+ NamedAttribute);
+
+ /// The name of the symbolRef attribute specifying the kernel to launch.
+ static StringRef getKernelAttrName() { return "kernel"; }
+
+ /// The name of the symbolRef attribute specifying the name of the module
+ /// containing the kernel to launch.
+ static StringRef getKernelModuleAttrName() { return "kernel_module"; }
+ }];
+
+ let verifier = [{ return ::verify(*this); }];
+}
+
def GPU_LaunchOp : GPU_Op<"launch", [IsolatedFromAbove]>,
Arguments<(ins Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
def LLVM_Type : Type<CPred<"$_self.isa<::mlir::LLVM::LLVMType>()">,
"LLVM dialect type">;
+// Type constraint accepting only wrapped LLVM integer types.
+def LLVMInt : TypeConstraint<
+ And<[LLVM_Type.predicate,
+ CPred<"$_self.cast<::mlir::LLVM::LLVMType>().isIntegerTy()">]>,
+ "LLVM dialect integer">;
+
// Base class for LLVM operations. Defines the interface to the llvm::IRBuilder
// used to translate to LLVM IR proper.
class LLVM_OpBase<Dialect dialect, string mnemonic, list<OpTrait> traits = []> :
GPUDialect::GPUDialect(MLIRContext *context)
: Dialect(getDialectName(), context) {
- addOperations<LaunchFuncOp,
+ addOperations<
#define GET_OP_LIST
#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
- >();
+ >();
}
LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
return KernelDim3{getOperand(3), getOperand(4), getOperand(5)};
}
-LogicalResult LaunchFuncOp::verify() {
- auto module = getParentOfType<ModuleOp>();
+LogicalResult verify(LaunchFuncOp op) {
+ auto module = op.getParentOfType<ModuleOp>();
if (!module)
- return emitOpError("expected to belong to a module");
+ return op.emitOpError("expected to belong to a module");
if (!module.getAttrOfType<UnitAttr>(GPUDialect::getContainerModuleAttrName()))
- return emitOpError("expected the closest surrounding module to have the '" +
- GPUDialect::getContainerModuleAttrName() +
- "' attribute");
+ return op.emitOpError(
+ "expected the closest surrounding module to have the '" +
+ GPUDialect::getContainerModuleAttrName() + "' attribute");
- auto kernelAttr = getAttrOfType<StringAttr>(getKernelAttrName());
+ auto kernelAttr = op.getAttrOfType<StringAttr>(op.getKernelAttrName());
if (!kernelAttr)
- return emitOpError("string attribute '" + getKernelAttrName() +
- "' must be specified");
+ return op.emitOpError("string attribute '" + op.getKernelAttrName() +
+ "' must be specified");
auto kernelModuleAttr =
- getAttrOfType<SymbolRefAttr>(getKernelModuleAttrName());
+ op.getAttrOfType<SymbolRefAttr>(op.getKernelModuleAttrName());
if (!kernelModuleAttr)
- return emitOpError("symbol reference attribute '" +
- getKernelModuleAttrName() + "' must be specified");
+ return op.emitOpError("symbol reference attribute '" +
+ op.getKernelModuleAttrName() + "' must be specified");
return success();
}