Plug gpu.func into the GPU lowering pipelines

author Alex Zinenko <zinenko@google.com>

Mon, 16 Dec 2019 20:12:20 +0000 (12:12 -0800)

committer A. Unique TensorFlower <gardener@tensorflow.org>

Mon, 16 Dec 2019 20:12:48 +0000 (12:12 -0800)
author Alex Zinenko <zinenko@google.com>
Mon, 16 Dec 2019 20:12:20 +0000 (12:12 -0800)
committer A. Unique TensorFlower <gardener@tensorflow.org>
Mon, 16 Dec 2019 20:12:48 +0000 (12:12 -0800)
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td

index 7ef1080..5f7bab3 100644 (file)
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -120,9 +120,9 @@ def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> {
  
    let builders = [
      OpBuilder<"Builder *builder, OperationState &result, StringRef name, "
-              "FunctionType type, ArrayRef<Type> workgroupAttributions, "
-              "ArrayRef<Type> privateAttributions, "
-              "ArrayRef<NamedAttribute> attrs">
+              "FunctionType type, ArrayRef<Type> workgroupAttributions = {}, "
+              "ArrayRef<Type> privateAttributions = {}, "
+              "ArrayRef<NamedAttribute> attrs = {}">
    ];
  
    let extraClassDeclaration = [{
@@ -138,6 +138,17 @@ def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> {
        return getTypeAttr().getValue().cast<FunctionType>();
      }
  
+    /// Change the type of this function in place. This is an extremely
+    /// dangerous operation and it is up to the caller to ensure that this is
+    /// legal for this function, and to restore invariants:
+    ///  - the entry block args must be updated to match the function params.
+    ///  - the argument/result attributes may need an update: if the new type
+    ///  has less parameters we drop the extra attributes, if there are more
+    ///  parameters they won't have any attributes.
+    // TODO(b/146349912): consider removing this function thanks to rewrite
+    // patterns.
+    void setType(FunctionType newType);
+
      /// Returns the number of buffers located in the workgroup memory.
      unsigned getNumWorkgroupAttributions() {
        return getAttrOfType<IntegerAttr>(getNumWorkgroupAttributionsAttrName())
@@ -270,11 +281,11 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
    let skipDefaultBuilders = 1;
  
    let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
+    OpBuilder<"Builder *builder, OperationState &result, GPUFuncOp kernelFunc, "
                "Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ, "
                "Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ, "
                "ValueRange kernelOperands">,
-    OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
+    OpBuilder<"Builder *builder, OperationState &result, GPUFuncOp kernelFunc, "
                "KernelDim3 gridSize, KernelDim3 blockSize, "
                "ValueRange kernelOperands">
    ];
diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h b/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h

index 1619a5e..f48a1d0 100644 (file)
--- a/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h
+++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h
@@ -83,12 +83,6 @@ StringRef getEntryPointABIAttrName();
  EntryPointABIAttr getEntryPointABIAttr(ArrayRef<int32_t> localSize,
                                         MLIRContext *context);
  
-/// Legalizes a function as an entry function.
-FuncOp lowerAsEntryFunction(FuncOp funcOp, SPIRVTypeConverter &typeConverter,
-                            ConversionPatternRewriter &rewriter,
-                            spirv::EntryPointABIAttr entryPointInfo,
-                            ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo);
-
  /// Sets the InterfaceVarABIAttr and EntryPointABIAttr for a function and its
  /// arguments
  LogicalResult setABIAttrs(FuncOp funcOp,
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

index e4bdd7c..f41c0c4 100644 (file)
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -489,8 +489,6 @@ struct GPUFuncOpLowering : LLVMOpLowering {
      }
  
      // Rewrite the original GPU function to an LLVM function.
-    // TODO(zinenko): there is a hack in the std->llvm lowering that promotes
-    // structs to pointers that probably needs to be replicated here.
      auto funcType = lowering.convertType(gpuFuncOp.getType())
                          .cast<LLVM::LLVMType>()
                          .getPointerElementTy();
@@ -576,16 +574,51 @@ struct GPUFuncOpLowering : LLVMOpLowering {
        }
      }
  
+    // Move the region to the new function, update the entry block signature.
      rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
                                  llvmFuncOp.end());
      rewriter.applySignatureConversion(&llvmFuncOp.getBody(),
                                        signatureConversion);
  
+    {
+      // For memref-typed arguments, insert the relevant loads in the beginning
+      // of the block to comply with the LLVM dialect calling convention. This
+      // needs to be done after signature conversion to get the right types.
+      OpBuilder::InsertionGuard guard(rewriter);
+      Block &block = llvmFuncOp.front();
+      rewriter.setInsertionPointToStart(&block);
+
+      for (auto en : llvm::enumerate(gpuFuncOp.getType().getInputs())) {
+        if (!en.value().isa<MemRefType>() &&
+            !en.value().isa<UnrankedMemRefType>())
+          continue;
+
+        BlockArgument *arg = block.getArgument(en.index());
+        Value *loaded = rewriter.create<LLVM::LoadOp>(loc, arg);
+        rewriter.replaceUsesOfBlockArgument(arg, loaded);
+      }
+    }
+
      rewriter.eraseOp(gpuFuncOp);
      return matchSuccess();
    }
  };
  
+struct GPUReturnOpLowering : public LLVMOpLowering {
+  GPUReturnOpLowering(LLVMTypeConverter &typeConverter)
+      : LLVMOpLowering(gpu::ReturnOp::getOperationName(),
+                       typeConverter.getDialect()->getContext(),
+                       typeConverter) {}
+
+  PatternMatchResult
+  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, operands,
+                                                ArrayRef<Block *>());
+    return matchSuccess();
+  }
+};
+
  /// Import the GPU Ops to NVVM Patterns.
  #include "GPUToNVVM.cpp.inc"
  
@@ -632,7 +665,8 @@ void mlir::populateGpuToNVVMConversionPatterns(
                                            NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
                GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
                                            NVVM::GridDimYOp, NVVM::GridDimZOp>,
-              GPUAllReduceOpLowering, GPUFuncOpLowering>(converter);
+              GPUAllReduceOpLowering, GPUFuncOpLowering, GPUReturnOpLowering>(
+          converter);
    patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
                                                 "__nv_exp");
  }
diff --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp

index 2b39c0d..a8747a7 100644 (file)
--- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
@@ -51,21 +51,20 @@ public:
                    ConversionPatternRewriter &rewriter) const override;
  };
  
-/// Pattern to convert a kernel function in GPU dialect (a FuncOp with the
-/// attribute gpu.kernel) within a spv.module.
-class KernelFnConversion final : public SPIRVOpLowering<FuncOp> {
+/// Pattern to convert a kernel function in GPU dialect within a spv.module.
+class KernelFnConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
  public:
    KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter,
                       ArrayRef<int64_t> workGroupSize,
                       PatternBenefit benefit = 1)
-      : SPIRVOpLowering<FuncOp>(context, converter, benefit) {
+      : SPIRVOpLowering<gpu::GPUFuncOp>(context, converter, benefit) {
      auto config = workGroupSize.take_front(3);
      workGroupSizeAsInt32.assign(config.begin(), config.end());
      workGroupSizeAsInt32.resize(3, 1);
    }
  
    PatternMatchResult
-  matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
+  matchAndRewrite(gpu::GPUFuncOp funcOp, ArrayRef<Value *> operands,
                    ConversionPatternRewriter &rewriter) const override;
  
  private:
@@ -96,6 +95,17 @@ public:
                    ConversionPatternRewriter &rewriter) const override;
  };
  
+/// Pattern to convert a gpu.return into a SPIR-V return.
+// TODO: This can go to DRR when GPU return has operands.
+class GPUReturnOpConversion final : public SPIRVOpLowering<gpu::ReturnOp> {
+public:
+  using SPIRVOpLowering<gpu::ReturnOp>::SPIRVOpLowering;
+
+  PatternMatchResult
+  matchAndRewrite(gpu::ReturnOp returnOp, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
  } // namespace
  
  //===----------------------------------------------------------------------===//
@@ -204,11 +214,58 @@ PatternMatchResult LaunchConfigConversion<SourceOp, builtin>::matchAndRewrite(
  }
  
  //===----------------------------------------------------------------------===//
-// FuncOp with gpu.kernel attribute.
+// GPUFuncOp
  //===----------------------------------------------------------------------===//
  
+// Legalizes a GPU function as an entry SPIR-V function.
+static FuncOp
+lowerAsEntryFunction(gpu::GPUFuncOp funcOp, SPIRVTypeConverter &typeConverter,
+                     ConversionPatternRewriter &rewriter,
+                     spirv::EntryPointABIAttr entryPointInfo,
+                     ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
+  auto fnType = funcOp.getType();
+  if (fnType.getNumResults()) {
+    funcOp.emitError("SPIR-V lowering only supports entry functions"
+                     "with no return values right now");
+    return nullptr;
+  }
+  if (fnType.getNumInputs() != argABIInfo.size()) {
+    funcOp.emitError(
+        "lowering as entry functions requires ABI info for all arguments");
+    return nullptr;
+  }
+  // For entry functions need to make the signature void(void). Compute the
+  // replacement value for all arguments and replace all uses.
+  TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs());
+  {
+    for (auto argType : enumerate(funcOp.getType().getInputs())) {
+      auto convertedType = typeConverter.convertType(argType.value());
+      signatureConverter.addInputs(argType.index(), convertedType);
+    }
+  }
+  auto newFuncOp = rewriter.create<FuncOp>(
+      funcOp.getLoc(), funcOp.getName(),
+      rewriter.getFunctionType(signatureConverter.getConvertedTypes(),
+                               llvm::None),
+      ArrayRef<NamedAttribute>());
+  for (const auto &namedAttr : funcOp.getAttrs()) {
+    if (namedAttr.first.is(impl::getTypeAttrName()) ||
+        namedAttr.first.is(SymbolTable::getSymbolAttrName()))
+      continue;
+    newFuncOp.setAttr(namedAttr.first, namedAttr.second);
+  }
+  rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
+                              newFuncOp.end());
+  rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
+  rewriter.eraseOp(funcOp);
+
+  spirv::setABIAttrs(newFuncOp, entryPointInfo, argABIInfo);
+  return newFuncOp;
+}
+
  PatternMatchResult
-KernelFnConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
+KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
+                                    ArrayRef<Value *> operands,
                                      ConversionPatternRewriter &rewriter) const {
    if (!gpu::GPUDialect::isKernel(funcOp)) {
      return matchFailure();
@@ -223,8 +280,8 @@ KernelFnConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
    auto context = rewriter.getContext();
    auto entryPointAttr =
        spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context);
-  FuncOp newFuncOp = spirv::lowerAsEntryFunction(
-      funcOp, typeConverter, rewriter, entryPointAttr, argABI);
+  FuncOp newFuncOp = lowerAsEntryFunction(funcOp, typeConverter, rewriter,
+                                          entryPointAttr, argABI);
    if (!newFuncOp) {
      return matchFailure();
    }
@@ -275,6 +332,20 @@ PatternMatchResult KernelModuleTerminatorConversion::matchAndRewrite(
  }
  
  //===----------------------------------------------------------------------===//
+// GPU return inside kernel functions to SPIR-V return.
+//===----------------------------------------------------------------------===//
+
+PatternMatchResult GPUReturnOpConversion::matchAndRewrite(
+    gpu::ReturnOp returnOp, ArrayRef<Value *> operands,
+    ConversionPatternRewriter &rewriter) const {
+  if (!operands.empty())
+    return matchFailure();
+
+  rewriter.replaceOpWithNewOp<spirv::ReturnOp>(returnOp);
+  return matchSuccess();
+}
+
+//===----------------------------------------------------------------------===//
  // GPU To SPIRV Patterns.
  //===----------------------------------------------------------------------===//
  
@@ -285,7 +356,8 @@ void populateGPUToSPIRVPatterns(MLIRContext *context,
                                  ArrayRef<int64_t> workGroupSize) {
    patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
    patterns.insert<
-      ForOpConversion, KernelModuleConversion, KernelModuleTerminatorConversion,
+      GPUReturnOpConversion, ForOpConversion, KernelModuleConversion,
+      KernelModuleTerminatorConversion,
        LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
        LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
        LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

index 1f48d6d..46a568c 100644 (file)
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -94,9 +94,9 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
      // Check that `launch_func` refers to a well-formed kernel function.
      StringRef kernelName = launchOp.kernel();
      Operation *kernelFunc = kernelModule.lookupSymbol(kernelName);
-    auto kernelStdFunction = dyn_cast_or_null<::mlir::FuncOp>(kernelFunc);
+    auto kernelGPUFunction = dyn_cast_or_null<gpu::GPUFuncOp>(kernelFunc);
      auto kernelLLVMFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(kernelFunc);
-    if (!kernelStdFunction && !kernelLLVMFunction)
+    if (!kernelGPUFunction && !kernelLLVMFunction)
        return launchOp.emitOpError("kernel function '")
               << kernelName << "' is undefined";
      if (!kernelFunc->getAttrOfType<mlir::UnitAttr>(
@@ -107,7 +107,7 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
      unsigned actualNumArguments = launchOp.getNumKernelOperands();
      unsigned expectedNumArguments = kernelLLVMFunction
                                          ? kernelLLVMFunction.getNumArguments()
-                                        : kernelStdFunction.getNumArguments();
+                                        : kernelGPUFunction.getNumArguments();
      if (expectedNumArguments != actualNumArguments)
        return launchOp.emitOpError("got ")
               << actualNumArguments << " kernel operands but expected "
@@ -488,7 +488,7 @@ void LaunchOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
  //===----------------------------------------------------------------------===//
  
  void LaunchFuncOp::build(Builder *builder, OperationState &result,
-                         ::mlir::FuncOp kernelFunc, Value *gridSizeX,
+                         GPUFuncOp kernelFunc, Value *gridSizeX,
                           Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
                           Value *blockSizeY, Value *blockSizeZ,
                           ValueRange kernelOperands) {
@@ -505,7 +505,7 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
  }
  
  void LaunchFuncOp::build(Builder *builder, OperationState &result,
-                         ::mlir::FuncOp kernelFunc, KernelDim3 gridSize,
+                         GPUFuncOp kernelFunc, KernelDim3 gridSize,
                           KernelDim3 blockSize, ValueRange kernelOperands) {
    build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
          blockSize.x, blockSize.y, blockSize.z, kernelOperands);
@@ -718,6 +718,18 @@ void printGPUFuncOp(OpAsmPrinter &p, GPUFuncOp op) {
    p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false);
  }
  
+void GPUFuncOp::setType(FunctionType newType) {
+  auto oldType = getType();
+  assert(newType.getNumResults() == oldType.getNumResults() &&
+         "unimplemented: changes to the number of results");
+
+  SmallVector<char, 16> nameBuf;
+  for (int i = newType.getNumInputs(), e = oldType.getNumInputs(); i < e; i++)
+    removeAttr(getArgAttrName(i, nameBuf));
+
+  setAttr(getTypeAttrName(), TypeAttr::get(newType));
+}
+
  /// Hook for FunctionLike verifier.
  LogicalResult GPUFuncOp::verifyType() {
    Type type = getTypeAttr().getValue();
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp

index b466cc2..416a37b 100644 (file)
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -39,19 +39,21 @@ static void createForAllDimensions(OpBuilder &builder, Location loc,
    }
  }
  
-// Add operations generating block/thread ids and gird/block dimensions at the
-// beginning of `kernelFunc` and replace uses of the respective function args.
-static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
-  OpBuilder OpBuilder(kernelFunc.getBody());
+// Add operations generating block/thread ids and grid/block dimensions at the
+// beginning of the `body` region and replace uses of the respective function
+// arguments.
+static void injectGpuIndexOperations(Location loc, Region &body) {
+  OpBuilder builder(loc->getContext());
+  Block &firstBlock = body.front();
+  builder.setInsertionPointToStart(&firstBlock);
    SmallVector<Value *, 12> indexOps;
-  createForAllDimensions<gpu::BlockIdOp>(OpBuilder, loc, indexOps);
-  createForAllDimensions<gpu::ThreadIdOp>(OpBuilder, loc, indexOps);
-  createForAllDimensions<gpu::GridDimOp>(OpBuilder, loc, indexOps);
-  createForAllDimensions<gpu::BlockDimOp>(OpBuilder, loc, indexOps);
+  createForAllDimensions<gpu::BlockIdOp>(builder, loc, indexOps);
+  createForAllDimensions<gpu::ThreadIdOp>(builder, loc, indexOps);
+  createForAllDimensions<gpu::GridDimOp>(builder, loc, indexOps);
+  createForAllDimensions<gpu::BlockDimOp>(builder, loc, indexOps);
    // Replace the leading 12 function args with the respective thread/block index
    // operations. Iterate backwards since args are erased and indices change.
    for (int i = 11; i >= 0; --i) {
-    auto &firstBlock = kernelFunc.front();
      firstBlock.getArgument(i)->replaceAllUsesWith(indexOps[i]);
      firstBlock.eraseArgument(i);
    }
@@ -63,7 +65,7 @@ static bool isInliningBeneficiary(Operation *op) {
  
  // Move arguments of the given kernel function into the function if this reduces
  // the number of kernel arguments.
-static gpu::LaunchFuncOp inlineBeneficiaryOps(FuncOp kernelFunc,
+static gpu::LaunchFuncOp inlineBeneficiaryOps(gpu::GPUFuncOp kernelFunc,
                                                gpu::LaunchFuncOp launch) {
    OpBuilder kernelBuilder(kernelFunc.getBody());
    auto &firstBlock = kernelFunc.getBody().front();
@@ -107,31 +109,30 @@ static gpu::LaunchFuncOp inlineBeneficiaryOps(FuncOp kernelFunc,
  
  // Outline the `gpu.launch` operation body into a kernel function. Replace
  // `gpu.return` operations by `std.return` in the generated function.
-static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
+static gpu::GPUFuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
    Location loc = launchOp.getLoc();
+  // Create a builder with no insertion point, insertion will happen separately
+  // due to symbol table manipulation.
+  OpBuilder builder(launchOp.getContext());
+
    SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes());
    FunctionType type =
        FunctionType::get(kernelOperandTypes, {}, launchOp.getContext());
    std::string kernelFuncName =
        Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str();
-  FuncOp outlinedFunc = FuncOp::create(loc, kernelFuncName, type);
-  outlinedFunc.getBody().takeBody(launchOp.body());
-  Builder builder(launchOp.getContext());
+  auto outlinedFunc = builder.create<gpu::GPUFuncOp>(loc, kernelFuncName, type);
    outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
                         builder.getUnitAttr());
-  injectGpuIndexOperations(loc, outlinedFunc);
-  outlinedFunc.walk([](gpu::ReturnOp op) {
-    OpBuilder replacer(op);
-    replacer.create<ReturnOp>(op.getLoc());
-    op.erase();
-  });
+  outlinedFunc.body().takeBody(launchOp.body());
+  injectGpuIndexOperations(loc, outlinedFunc.body());
    return outlinedFunc;
  }
  
  // Replace `gpu.launch` operations with an `gpu.launch_func` operation launching
  // `kernelFunc`. The kernel func contains the body of the `gpu.launch` with
  // constant region arguments inlined.
-static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
+static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp,
+                                  gpu::GPUFuncOp kernelFunc) {
    OpBuilder builder(launchOp);
    auto launchFuncOp = builder.create<gpu::LaunchFuncOp>(
        launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
@@ -160,7 +161,7 @@ public:
        // Insert just after the function.
        Block::iterator insertPt(func.getOperation()->getNextNode());
        func.walk([&](gpu::LaunchOp op) {
-        FuncOp outlinedFunc = outlineKernelFunc(op);
+        gpu::GPUFuncOp outlinedFunc = outlineKernelFunc(op);
  
          // Create nested module and insert outlinedFunc. The module will
          // originally get the same name as the function, but may be renamed on
@@ -183,7 +184,7 @@ public:
  
  private:
    // Returns a module containing kernelFunc and all callees (recursive).
-  ModuleOp createKernelModule(FuncOp kernelFunc,
+  ModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
                                const SymbolTable &parentSymbolTable) {
      auto context = getModule().getContext();
      Builder builder(context);
diff --git a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp

index 67c036d..1e68b49 100644 (file)
--- a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp
+++ b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp
@@ -249,43 +249,6 @@ Value *mlir::spirv::getBuiltinVariableValue(Operation *op,
  // Entry Function signature Conversion
  //===----------------------------------------------------------------------===//
  
-FuncOp mlir::spirv::lowerAsEntryFunction(
-    FuncOp funcOp, SPIRVTypeConverter &typeConverter,
-    ConversionPatternRewriter &rewriter,
-    spirv::EntryPointABIAttr entryPointInfo,
-    ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
-  auto fnType = funcOp.getType();
-  if (fnType.getNumResults()) {
-    funcOp.emitError("SPIR-V lowering only supports entry functions"
-                     "with no return values right now");
-    return nullptr;
-  }
-  if (fnType.getNumInputs() != argABIInfo.size()) {
-    funcOp.emitError(
-        "lowering as entry functions requires ABI info for all arguments");
-    return nullptr;
-  }
-  // For entry functions need to make the signature void(void). Compute the
-  // replacement value for all arguments and replace all uses.
-  TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs());
-  {
-    for (auto argType : enumerate(funcOp.getType().getInputs())) {
-      auto convertedType = typeConverter.convertType(argType.value());
-      signatureConverter.addInputs(argType.index(), convertedType);
-    }
-  }
-  auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
-  rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
-                              newFuncOp.end());
-  newFuncOp.setType(rewriter.getFunctionType(
-      signatureConverter.getConvertedTypes(), llvm::None));
-  rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
-  rewriter.eraseOp(funcOp);
-
-  spirv::setABIAttrs(newFuncOp, entryPointInfo, argABIInfo);
-  return newFuncOp;
-}
-
  LogicalResult
  mlir::spirv::setABIAttrs(FuncOp funcOp, spirv::EntryPointABIAttr entryPointInfo,
                           ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
diff --git a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir

index d4c5c0f..6865462 100644 (file)
--- a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
+++ b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
@@ -6,8 +6,9 @@ module attributes {gpu.container_module} {
    // CHECK: llvm.mlir.global internal constant @[[global:.*]]("CUBIN")
  
    module @kernel_module attributes {gpu.kernel_module, nvvm.cubin = "CUBIN"} {
-    func @kernel(!llvm.float, !llvm<"float*">)
-        attributes { gpu.kernel }
+    gpu.func @kernel(%arg0: !llvm.float, %arg1: !llvm<"float*">) attributes {gpu.kernel} {
+      gpu.return
+    }
    }
  
    llvm.func @foo() {
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

index 30bba48..525016b 100644 (file)
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -122,3 +122,4 @@ module attributes {gpu.kernel_module} {
      "test.finish" () : () -> ()
    }) : () -> ()
  }
+
diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir

index 2139cca..c0a68a9 100644 (file)
--- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@@ -10,13 +10,13 @@ module attributes {gpu.container_module} {
    // CHECK-LABEL:  spv.module "Logical" "GLSL450"
    // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
    module @kernels attributes {gpu.kernel_module} {
-    func @builtin_workgroup_id_x()
+    gpu.func @builtin_workgroup_id_x()
        attributes {gpu.kernel} {
        // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
        // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
        // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
        %0 = "gpu.block_id"() {dimension = "x"} : () -> index
-      return
+      gpu.return
      }
    }
  }
@@ -33,13 +33,13 @@ module attributes {gpu.container_module} {
    // CHECK-LABEL:  spv.module "Logical" "GLSL450"
    // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
    module @kernels attributes {gpu.kernel_module} {
-    func @builtin_workgroup_id_y()
+    gpu.func @builtin_workgroup_id_y()
        attributes {gpu.kernel} {
        // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
        // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
        // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
        %0 = "gpu.block_id"() {dimension = "y"} : () -> index
-      return
+      gpu.return
      }
    }
  }
@@ -56,13 +56,13 @@ module attributes {gpu.container_module} {
    // CHECK-LABEL:  spv.module "Logical" "GLSL450"
    // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
    module @kernels attributes {gpu.kernel_module} {
-    func @builtin_workgroup_id_z()
+    gpu.func @builtin_workgroup_id_z()
        attributes {gpu.kernel} {
        // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
        // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
        // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
        %0 = "gpu.block_id"() {dimension = "z"} : () -> index
-      return
+      gpu.return
      }
    }
  }
@@ -79,13 +79,13 @@ module attributes {gpu.container_module} {
    // CHECK-LABEL:  spv.module "Logical" "GLSL450"
    // CHECK: spv.globalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
    module @kernels attributes {gpu.kernel_module} {
-    func @builtin_workgroup_size_x()
+    gpu.func @builtin_workgroup_size_x()
        attributes {gpu.kernel} {
        // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPSIZE]]
        // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
        // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
        %0 = "gpu.block_dim"() {dimension = "x"} : () -> index
-      return
+      gpu.return
      }
    }
  }
@@ -102,13 +102,13 @@ module attributes {gpu.container_module} {
    // CHECK-LABEL:  spv.module "Logical" "GLSL450"
    // CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId")
    module @kernels attributes {gpu.kernel_module} {
-    func @builtin_local_id_x()
+    gpu.func @builtin_local_id_x()
        attributes {gpu.kernel} {
        // CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]]
        // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
        // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
        %0 = "gpu.thread_id"() {dimension = "x"} : () -> index
-      return
+      gpu.return
      }
    }
  }
@@ -125,13 +125,13 @@ module attributes {gpu.container_module} {
    // CHECK-LABEL:  spv.module "Logical" "GLSL450"
    // CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups")
    module @kernels attributes {gpu.kernel_module} {
-    func @builtin_num_workgroups_x()
+    gpu.func @builtin_num_workgroups_x()
        attributes {gpu.kernel} {
        // CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]]
        // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
        // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
        %0 = "gpu.grid_dim"() {dimension = "x"} : () -> index
-      return
+      gpu.return
      }
    }
  }
diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir

index 786a16b..3244256 100644 (file)
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -29,7 +29,7 @@ module attributes {gpu.container_module} {
      // CHECK-SAME: [[ARG4:%.*]]: i32 {spirv.interface_var_abi = {binding = 4 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
      // CHECK-SAME: [[ARG5:%.*]]: i32 {spirv.interface_var_abi = {binding = 5 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
      // CHECK-SAME: [[ARG6:%.*]]: i32 {spirv.interface_var_abi = {binding = 6 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
-    func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index)
+    gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index)
        attributes  {gpu.kernel} {
        // CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]]
        // CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]]
@@ -70,7 +70,7 @@ module attributes {gpu.container_module} {
        // CHECK: [[PTR3:%.*]] = spv.AccessChain [[ARG2]]{{\[}}{{%.*}}, {{%.*}}{{\]}}
        // CHECK-NEXT: spv.Store "StorageBuffer" [[PTR3]], [[VAL3]]
        store %16, %arg2[%12, %13] : memref<12x4xf32>
-      return
+      gpu.return
      }
    }
  }
diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir

index 43a6b3e..6d38360 100644 (file)
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@@ -8,7 +8,7 @@ module attributes {gpu.container_module} {
    }
  
    module @kernels attributes {gpu.kernel_module} {
-    func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>)
+    gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>)
      attributes {gpu.kernel} {
        // CHECK: [[LB:%.*]] = spv.constant 4 : i32
        %lb = constant 4 : index
@@ -37,7 +37,7 @@ module attributes {gpu.container_module} {
          %1 = load %arg2[%arg4] : memref<10xf32>
          store %1, %arg3[%arg4] : memref<10xf32>
        }
-      return
+      gpu.return
      }
    }
-}
-\ No newline at end of file
+}
diff --git a/mlir/test/Conversion/GPUToSPIRV/simple.mlir b/mlir/test/Conversion/GPUToSPIRV/simple.mlir

index 5ec78ed..c1f4324 100644 (file)
--- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
@@ -8,10 +8,9 @@ module attributes {gpu.container_module} {
      // CHECK-SAME: {{%.*}}: f32 {spirv.interface_var_abi = {binding = 0 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
      // CHECK-SAME: {{%.*}}: !spv.ptr<!spv.struct<!spv.array<12 x f32 [4]> [0]>, StorageBuffer> {spirv.interface_var_abi = {binding = 1 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
      // CHECK-SAME: spirv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}
-    func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32>)
-        attributes { gpu.kernel } {
+    gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} {
        // CHECK: spv.Return
-      return
+      gpu.return
      }
      // CHECK: attributes {capabilities = ["Shader"], extensions = ["SPV_KHR_storage_buffer_storage_class"]}
    }
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir

index 6565c62..2065595 100644 (file)
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -194,13 +194,13 @@ module attributes {gpu.container_module} {
  
  module attributes {gpu.container_module} {
    module @kernels attributes {gpu.kernel_module} {
-    func @kernel_1(%arg1 : !llvm<"float*">) {
-      return
+    gpu.func @kernel_1(%arg1 : !llvm<"float*">) kernel {
+      gpu.return
      }
    }
  
    func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm<"float*">) {
-    // expected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}}
+    // xpected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}}
      "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
      {kernel = "kernel_1", kernel_module = @kernels}
          : (index, index, index, index, index, index, !llvm<"float*">) -> ()
@@ -212,8 +212,8 @@ module attributes {gpu.container_module} {
  
  module attributes {gpu.container_module} {
    module @kernels attributes {gpu.kernel_module} {
-    func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
-      return
+    gpu.func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
+      gpu.return
      }
    }
  
@@ -230,8 +230,8 @@ module attributes {gpu.container_module} {
  // -----
  
  module @kernels attributes {gpu.kernel_module} {
-  func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
-    return
+  gpu.func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
+    gpu.return
    }
  }
  
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir

index e2fd26f..ff5a40d 100644 (file)
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -61,8 +61,7 @@ module attributes {gpu.container_module} {
    }
  
    module @kernels attributes {gpu.kernel_module} {
-    func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>)
-        attributes { gpu.kernel } {
+    gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>) attributes {gpu.kernel} {
        %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
        %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
        %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
@@ -86,11 +85,12 @@ module attributes {gpu.container_module} {
  
        "some_op"(%bIdX, %tIdX) : (index, index) -> ()
        %42 = load %arg1[%bIdX] : memref<?xf32, 1>
-      return
+      gpu.return
      }
  
-    func @kernel_2(f32, memref<?xf32, 1>)
-        attributes { gpu.kernel }
+    gpu.func @kernel_2(%arg0: f32, %arg1: memref<?xf32, 1>) attributes {gpu.kernel} {
+      gpu.return
+    }
    }
  
    func @foo() {
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir

index 6b69920..5adb881 100644 (file)
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -38,9 +38,8 @@ func @launch() {
  
  
  // CHECK-LABEL: module @launch_kernel
-// CHECK-NEXT: func @launch_kernel
+// CHECK-NEXT: gpu.func @launch_kernel
  // CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref<?xf32, 1>)
-// CHECK:      attributes {gpu.kernel}
  // CHECK-NEXT: %[[BID:.*]] = "gpu.block_id"() {dimension = "x"} : () -> index
  // CHECK-NEXT: = "gpu.block_id"() {dimension = "y"} : () -> index
  // CHECK-NEXT: = "gpu.block_id"() {dimension = "z"} : () -> index
@@ -138,7 +137,7 @@ func @recursive_device_function() {
  }
  
  // CHECK: module @function_call_kernel attributes {gpu.kernel_module} {
-// CHECK:   func @function_call_kernel()
+// CHECK:   gpu.func @function_call_kernel()
  // CHECK:     call @device_function() : () -> ()
  // CHECK:     call @device_function() : () -> ()
  // CHECK:     llvm.mlir.addressof @global : !llvm<"i64*">
author	Alex Zinenko <zinenko@google.com>
	Mon, 16 Dec 2019 20:12:20 +0000 (12:12 -0800)
committer	A. Unique TensorFlower <gardener@tensorflow.org>
	Mon, 16 Dec 2019 20:12:48 +0000 (12:12 -0800)
mlir/include/mlir/Dialect/GPU/GPUOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h		patch \| blob \| history
mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp		patch \| blob \| history
mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp		patch \| blob \| history
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp		patch \| blob \| history
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp		patch \| blob \| history
mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp		patch \| blob \| history
mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir		patch \| blob \| history
mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir		patch \| blob \| history
mlir/test/Conversion/GPUToSPIRV/builtins.mlir		patch \| blob \| history
mlir/test/Conversion/GPUToSPIRV/load-store.mlir		patch \| blob \| history
mlir/test/Conversion/GPUToSPIRV/loop.mlir		patch \| blob \| history
mlir/test/Conversion/GPUToSPIRV/simple.mlir		patch \| blob \| history
mlir/test/Dialect/GPU/invalid.mlir		patch \| blob \| history
mlir/test/Dialect/GPU/ops.mlir		patch \| blob \| history
mlir/test/Dialect/GPU/outlining.mlir		patch \| blob \| history