From ecd4c7d67af8d167c51fbd86dbfcfca5bbc22102 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 10 May 2019 19:21:21 -0700 Subject: [PATCH] Revert "Pass for outlining gpu.launch operation bodies into kernel functions called by gpu.launch_func operations" OSS build was broken (missing CMakeLists.txt changes and compilation failures on Ubuntu) Automated rollback of changelist 247564213. PiperOrigin-RevId: 247713812 --- mlir/g3doc/Dialects/GPU.md | 6 +- mlir/include/mlir/GPU/GPUDialect.h | 4 - mlir/include/mlir/GPU/Passes.h | 33 -------- mlir/lib/GPU/IR/GPUDialect.cpp | 14 ---- mlir/lib/GPU/Transforms/KernelOutlining.cpp | 116 ---------------------------- mlir/test/GPU/outlining.mlir | 68 ---------------- 6 files changed, 4 insertions(+), 237 deletions(-) delete mode 100644 mlir/include/mlir/GPU/Passes.h delete mode 100644 mlir/lib/GPU/Transforms/KernelOutlining.cpp delete mode 100644 mlir/test/GPU/outlining.mlir diff --git a/mlir/g3doc/Dialects/GPU.md b/mlir/g3doc/Dialects/GPU.md index 2ebadbe..9adb053 100644 --- a/mlir/g3doc/Dialects/GPU.md +++ b/mlir/g3doc/Dialects/GPU.md @@ -141,8 +141,10 @@ Example: func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">) attributes { nvvm.kernel: true } { - // Operations that produce block/thread IDs and dimensions are injected when - // outlining the `gpu.launch` body to a function called by `gpu.launch_func`. + // Operations that produce block/thread IDs and dimensions will be injected + // when outlining the `gpu.launch` body to a function called by + // `gpu.launch_func`. + // TODO(tjoerg): Implement gpu.launch body outlining. %tIdX = "gpu.thread_id"() {dimension: "x"} : () -> (index) %tIdY = "gpu.thread_id"() {dimension: "y"} : () -> (index) %tIdZ = "gpu.thread_id"() {dimension: "z"} : () -> (index) diff --git a/mlir/include/mlir/GPU/GPUDialect.h b/mlir/include/mlir/GPU/GPUDialect.h index 7c8b2df..708aec4 100644 --- a/mlir/include/mlir/GPU/GPUDialect.h +++ b/mlir/include/mlir/GPU/GPUDialect.h @@ -78,10 +78,6 @@ public: KernelDim3 getGridSize(); /// Get the SSA values corresponding to kernel block size. KernelDim3 getBlockSize(); - /// Append the operand values passed as kernel arguments to `out`. - void getKernelOperandValues(SmallVectorImpl *out); - /// Append the operand types passed as kernel arguments to `out`. - void getKernelOperandTypes(SmallVectorImpl *out); /// Get the SSA values passed as operands to specify the grid size. KernelDim3 getGridSizeOperandValues(); /// Get the SSA values passed as operands to specify the block size. diff --git a/mlir/include/mlir/GPU/Passes.h b/mlir/include/mlir/GPU/Passes.h deleted file mode 100644 index 9dd4ca0..0000000 --- a/mlir/include/mlir/GPU/Passes.h +++ /dev/null @@ -1,33 +0,0 @@ -//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===// -// -// Copyright 2019 The MLIR Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -// -// This header file defines prototypes that expose pass constructors. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_GPU_PASSES_H_ -#define MLIR_GPU_PASSES_H_ - -namespace mlir { - -class ModulePassBase; - -ModulePassBase *createGpuKernelOutliningPass(); - -} // namespace mlir - -#endif // MLIR_GPU_PASSES_H_ diff --git a/mlir/lib/GPU/IR/GPUDialect.cpp b/mlir/lib/GPU/IR/GPUDialect.cpp index 4762add..bd882c9 100644 --- a/mlir/lib/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/GPU/IR/GPUDialect.cpp @@ -98,20 +98,6 @@ KernelDim3 LaunchOp::getBlockSize() { return KernelDim3{args[9], args[10], args[11]}; } -void LaunchOp::getKernelOperandValues(SmallVectorImpl *out) { - out->reserve(getNumOperands() - kNumConfigOperands + out->size()); - for (int i = kNumConfigOperands; i < getNumOperands(); ++i) { - out->push_back(getOperand(i)); - } -} - -void LaunchOp::getKernelOperandTypes(SmallVectorImpl *out) { - out->reserve(getNumOperands() - kNumConfigOperands + out->size()); - for (int i = kNumConfigOperands; i < getNumOperands(); ++i) { - out->push_back(getOperand(i)->getType()); - } -} - KernelDim3 LaunchOp::getGridSizeOperandValues() { return KernelDim3{getOperand(0), getOperand(1), getOperand(2)}; } diff --git a/mlir/lib/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/GPU/Transforms/KernelOutlining.cpp deleted file mode 100644 index dd73c19..0000000 --- a/mlir/lib/GPU/Transforms/KernelOutlining.cpp +++ /dev/null @@ -1,116 +0,0 @@ -//===- KernelOutlining.cpp - Implementation of GPU kernel outling ---------===// -// -// Copyright 2019 The MLIR Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -// -// This file implements the GPU dialect kernel outlining pass. -// -//===----------------------------------------------------------------------===// - -#include "mlir/GPU/GPUDialect.h" -#include "mlir/IR/BlockAndValueMapping.h" -#include "mlir/IR/Builders.h" -#include "mlir/Pass/Pass.h" -#include "mlir/StandardOps/Ops.h" - -using namespace mlir; - -namespace { - -template -void createForAllDimensions(FuncBuilder &builder, Location loc, - SmallVectorImpl *values) { - for (string dim : {"x", "y", "z"}) { - Value *v = builder.create(loc, builder.getIndexType(), - builder.getStringAttr(dim)); - values->push_back(v); - } -} - -// Add operations generating block/thread ids and gird/block dimensions at the -// beginning of `kernelFunc` and replace uses of the respective function args. -void injectGpuIndexOperations(Module &module, Location loc, - Function *kernelFunc) { - Builder builder(&module); - FuncBuilder funcBuilder(kernelFunc); - SmallVector indexOps; - createForAllDimensions(funcBuilder, loc, &indexOps); - createForAllDimensions(funcBuilder, loc, &indexOps); - createForAllDimensions(funcBuilder, loc, &indexOps); - createForAllDimensions(funcBuilder, loc, &indexOps); - // Replace the leading 12 function args with the respective thread/block index - // operations. Iterate backwards since args are erased and indices change. - for (int i = 11; i >= 0; --i) { - auto &firstBlock = kernelFunc->getBody().getBlocks().front(); - firstBlock.getArgument(i)->replaceAllUsesWith(indexOps[i]); - firstBlock.eraseArgument(i); - } -} - -// Outline the `gpu.launch` operation body into a kernel function. -Function *outlineKernelFunc(Module &module, gpu::LaunchOp &launchOp) { - Location loc = launchOp.getLoc(); - SmallVector kernelOperandTypes; - launchOp.getKernelOperandTypes(&kernelOperandTypes); - FunctionType type = - FunctionType::get(kernelOperandTypes, {}, module.getContext()); - string kernelFuncName = - Twine(launchOp.getOperation()->getFunction()->getName(), "_kernel").str(); - mlir::BlockAndValueMapping mapper; - Function *outlinedFunc = new mlir::Function(loc, kernelFuncName, type); - outlinedFunc->getBody().takeBody(launchOp.getBody()); - Builder builder(&module); - outlinedFunc->getAttrList().set( - builder.getIdentifier(gpu::GPUDialect::getKernelFuncAttrName()), - builder.getUnitAttr()); - injectGpuIndexOperations(module, loc, outlinedFunc); - module.getFunctions().push_back(outlinedFunc); - return outlinedFunc; -} - -// Replace `gpu.launch` operations with an `gpu.launch_func` operation launching -// `kernelFunc`. -void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, Function *kernelFunc) { - Location loc = launchOp.getLoc(); - FuncBuilder funcBuilder(launchOp); - SmallVector kernelOperandValues; - launchOp.getKernelOperandValues(&kernelOperandValues); - funcBuilder.create( - loc, kernelFunc, launchOp.getGridSizeOperandValues(), - launchOp.getBlockSizeOperandValues(), kernelOperandValues); - launchOp.erase(); -} - -} // namespace - -class GpuKernelOutliningPass : public ModulePass { -public: - void runOnModule() override { - for (auto &func : getModule()) { - func.walk([&](mlir::gpu::LaunchOp op) { - Function *outlinedFunc = outlineKernelFunc(getModule(), op); - convertToLaunchFuncOp(op, outlinedFunc); - }); - } - } -}; - -ModulePassBase *createGpuKernelOutliningPass() { - return new GpuKernelOutliningPass(); -} - -static PassRegistration - pass("gpu-kernel-outlining", - "Outline gpu.launch bodies to kernel functions."); diff --git a/mlir/test/GPU/outlining.mlir b/mlir/test/GPU/outlining.mlir deleted file mode 100644 index bd8bca3..0000000 --- a/mlir/test/GPU/outlining.mlir +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: mlir-opt -gpu-kernel-outlining -split-input-file %s | FileCheck %s - -func @launch() { - %0 = "op"() : () -> (f32) - %1 = "op"() : () -> (memref) - %gDimX = constant 8 : index - %gDimY = constant 12 : index - %gDimZ = constant 16 : index - %bDimX = constant 20 : index - %bDimY = constant 24 : index - %bDimZ = constant 28 : index - - // CHECK: "gpu.launch_func"(%c8, %c12, %c16, %c20, %c24, %c28, %0, %1) {kernel: @launch_kernel : (f32, memref) -> ()} : (index, index, index, index, index, index, f32, memref) -> () - // CHECK-NOT: gpu.launch blocks - gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, - %grid_z = %gDimZ) - threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, - %block_z = %bDimZ) - args(%arg0 = %0, %arg1 = %1) : f32, memref { - "use"(%arg0): (f32) -> () - "some_op"(%bx, %block_x) : (index, index) -> () - %42 = load %arg1[%tx] : memref - return - } - return -} - -// CHECK: func @launch_kernel(%arg0: f32, %arg1: memref) -// CHECK-NEXT: attributes {gpu.kernel} -// CHECK-NEXT: %0 = "gpu.block_id"() {dimension: "x"} : () -> index -// CHECK-NEXT: %1 = "gpu.block_id"() {dimension: "y"} : () -> index -// CHECK-NEXT: %2 = "gpu.block_id"() {dimension: "z"} : () -> index -// CHECK-NEXT: %3 = "gpu.thread_id"() {dimension: "x"} : () -> index -// CHECK-NEXT: %4 = "gpu.thread_id"() {dimension: "y"} : () -> index -// CHECK-NEXT: %5 = "gpu.thread_id"() {dimension: "z"} : () -> index -// CHECK-NEXT: %6 = "gpu.grid_dim"() {dimension: "x"} : () -> index -// CHECK-NEXT: %7 = "gpu.grid_dim"() {dimension: "y"} : () -> index -// CHECK-NEXT: %8 = "gpu.grid_dim"() {dimension: "z"} : () -> index -// CHECK-NEXT: %9 = "gpu.block_dim"() {dimension: "x"} : () -> index -// CHECK-NEXT: %10 = "gpu.block_dim"() {dimension: "y"} : () -> index -// CHECK-NEXT: %11 = "gpu.block_dim"() {dimension: "z"} : () -> index -// CHECK-NEXT: "use"(%arg0) : (f32) -> () -// CHECK-NEXT: "some_op"(%0, %9) : (index, index) -> () -// CHECK-NEXT: %12 = load %arg1[%3] : memref - -// ----- - -func @multiple_launches() { - %cst = constant 8 : index - // CHECK: "gpu.launch_func"(%c8, %c8, %c8, %c8, %c8, %c8) {kernel: @multiple_launches_kernel : () -> ()} : (index, index, index, index, index, index) -> () - gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, - %grid_z = %cst) - threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, - %block_z = %cst) { - return - } - // CHECK: "gpu.launch_func"(%c8, %c8, %c8, %c8, %c8, %c8) {kernel: @multiple_launches_kernel_0 : () -> ()} : (index, index, index, index, index, index) -> () - gpu.launch blocks(%bx2, %by2, %bz2) in (%grid_x2 = %cst, %grid_y2 = %cst, - %grid_z2 = %cst) - threads(%tx2, %ty2, %tz2) in (%block_x2 = %cst, %block_y2 = %cst, - %block_z2 = %cst) { - return - } - return -} - -// CHECK: func @multiple_launches_kernel() -// CHECK: func @multiple_launches_kernel_0() -- 2.7.4