From 834618a2ffbdaf3f2e94025b53f49f3764e5adb0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 29 Oct 2020 17:30:27 +0000 Subject: [PATCH] Revert "[mlir][gpu] Allow gpu.launch_func to be async." This reverts commit ec7780ebdab480139596c3cb08ee77d7035457b3. One of the bot is crashing in a test related to this change. --- mlir/include/mlir/Dialect/GPU/GPUOps.td | 41 +++++++++++---------------------- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 15 ++++-------- mlir/test/Dialect/GPU/invalid.mlir | 2 -- mlir/test/Dialect/GPU/ops.mlir | 4 ---- 4 files changed, 17 insertions(+), 45 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index 593b735..8ad3c9a 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -291,14 +291,12 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">, let parser = [{ return parseGPUFuncOp(parser, result); }]; } -def GPU_LaunchFuncOp : GPU_Op<"launch_func", - [GPU_AsyncOpInterface, AttrSizedOperandSegments]>, - Arguments<(ins Variadic:$asyncDependencies, - SymbolRefAttr:$kernel, +def GPU_LaunchFuncOp : GPU_Op<"launch_func">, + Arguments<(ins SymbolRefAttr:$kernel, Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ, Variadic:$operands)>, - Results<(outs Optional:$asyncToken)> { + Results<(outs)> { let summary = "Launches a function as a GPU kernel"; let description = [{ @@ -310,22 +308,14 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func", function is required to be a gpu.module. And finally, the module containing the kernel module (which thus cannot be the top-level module) is required to have the `gpu.container_module` attribute. The `gpu.launch_func` - operation has a symbol attribute named `kernel` to identify the fully + operation has a symbol attribute named `kernel` to identify the fully specified kernel function to launch (both the gpu.module and func). - The `gpu.launch_func` supports async dependencies: the kernel does not start - executing until the ops producing those async dependencies have completed. - - By the default, the host implicitly blocks until kernel execution has - completed. If the `async` keyword is present, the host does not block but - instead a `!gpu.async.token` is returned. Other async GPU ops can take this - token as dependency. - - The operation requires at least the grid and block sizes along the x,y,z - dimensions as arguments. When a lower-dimensional kernel is required, - unused sizes must be explicitly set to `1`. - - The remaining operands are passed as arguments to the kernel function. + The operation takes at least six operands, with the first three operands + being grid sizes along x,y,z dimensions and the following three being block + sizes along x,y,z dimensions. When a lower-dimensional kernel is required, + unused sizes must be explicitly set to `1`. The remaining operands are + passed as arguments to the kernel function. Example: @@ -361,15 +351,11 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func", } } - %t0 = gpu.wait async gpu.launch_func - async // (Optional) Don't block host, return token. - [%t0] // (Optional) Execute only after %t0 has completed. - @kernels::@kernel_1 // Kernel function. - blocks in (%cst, %cst, %cst) // Grid size. - threads in (%cst, %cst, %cst) // Block size. - args(%arg0 : f32, // (Optional) Kernel arguments. - %arg1 : memref) + @kernels::@kernel_1 // Kernel function. + blocks in (%cst, %cst, %cst) // Grid size. + threads in (%cst, %cst, %cst) // Block size. + args(%arg0 : f32, %arg1 : memref) // Kernel arguments. } ``` }]; @@ -415,7 +401,6 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func", let verifier = [{ return ::verify(*this); }]; let assemblyFormat = [{ - custom(type($asyncToken), $asyncDependencies) $kernel `blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)` `threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)` diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 8b0f7a3..9514d3e 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -438,15 +438,10 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, auto kernelSymbol = builder.getSymbolRefAttr( kernelModule.getName(), {builder.getSymbolRefAttr(kernelFunc.getName())}); result.addAttribute(getKernelAttrName(), kernelSymbol); - SmallVector segmentSizes(8, 1); - segmentSizes.front() = 0; // Initially no async dependencies. - segmentSizes.back() = static_cast(kernelOperands.size()); - result.addAttribute(getOperandSegmentSizeAttr(), - builder.getI32VectorAttr(segmentSizes)); } unsigned LaunchFuncOp::getNumKernelOperands() { - return getNumOperands() - asyncDependencies().size() - kNumConfigOperands; + return getNumOperands() - kNumConfigOperands; } StringRef LaunchFuncOp::getKernelModuleName() { @@ -456,17 +451,15 @@ StringRef LaunchFuncOp::getKernelModuleName() { StringRef LaunchFuncOp::getKernelName() { return kernel().getLeafReference(); } Value LaunchFuncOp::getKernelOperand(unsigned i) { - return getOperand(asyncDependencies().size() + kNumConfigOperands + i); + return getOperation()->getOperand(i + kNumConfigOperands); } KernelDim3 LaunchFuncOp::getGridSizeOperandValues() { - auto operands = getOperands().drop_front(asyncDependencies().size()); - return KernelDim3{operands[0], operands[1], operands[2]}; + return KernelDim3{getOperand(0), getOperand(1), getOperand(2)}; } KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() { - auto operands = getOperands().drop_front(asyncDependencies().size()); - return KernelDim3{operands[3], operands[4], operands[5]}; + return KernelDim3{getOperand(3), getOperand(4), getOperand(5)}; } static LogicalResult verify(LaunchFuncOp op) { diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index 3dc5be4..3612b8e 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -37,7 +37,6 @@ func @launch_requires_gpu_return(%sz : index) { func @launch_func_too_few_operands(%sz : index) { // expected-error@+1 {{expected 6 or more operands}} "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz) - {operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 0, 0]> : vector<8xi32>} : (index, index, index, index, index) -> () return } @@ -56,7 +55,6 @@ module attributes {gpu.container_module} { func @launch_func_missing_callee_attribute(%sz : index) { // expected-error@+1 {{'gpu.launch_func' op requires attribute 'kernel'}} "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) - {operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 1, 0]> : vector<8xi32>} : (index, index, index, index, index, index) -> () return } diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index a3b781a..e81b233 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -73,7 +73,6 @@ module attributes {gpu.container_module} { %1 = "op"() : () -> (memref) // CHECK: %{{.*}} = constant 8 %cst = constant 8 : index - %t0 = gpu.wait async // CHECK: gpu.launch_func @kernels::@kernel_1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) args(%{{.*}} : f32, %{{.*}} : memref) gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%0 : f32, %1 : memref) @@ -81,9 +80,6 @@ module attributes {gpu.container_module} { // CHECK: gpu.launch_func @kernels::@kernel_2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) gpu.launch_func @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) - // CHECK: %{{.*}} = gpu.launch_func async [%{{.*}}] @kernels::@kernel_2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) - %t1 = gpu.launch_func async [%t0] @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) - return } -- 2.7.4