Unify GPU op definition names with other dialects.
authorChristian Sigg <csigg@google.com>
Mon, 21 Oct 2019 18:10:13 +0000 (11:10 -0700)
committerA. Unique TensorFlower <gardener@tensorflow.org>
Mon, 21 Oct 2019 18:10:56 +0000 (11:10 -0700)
Rename GPU op names from gpu_Foo to GPU_FooOp.

PiperOrigin-RevId: 275882232

mlir/include/mlir/Dialect/GPU/GPUOps.td
mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td
mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp

index f38e1b0..9c0ab8e 100644 (file)
@@ -41,12 +41,12 @@ class GPU_IndexOp<string mnemonic, list<OpTrait> traits = []> :
   let verifier = [{ return ::verifyIndexOp(*this); }];
 }
 
-def gpu_BlockDim : GPU_IndexOp<"block_dim">;
-def gpu_BlockId : GPU_IndexOp<"block_id">;
-def gpu_GridDim : GPU_IndexOp<"grid_dim">;
-def gpu_ThreadId : GPU_IndexOp<"thread_id">;
+def GPU_BlockDimOp : GPU_IndexOp<"block_dim">;
+def GPU_BlockIdOp : GPU_IndexOp<"block_id">;
+def GPU_GridDimOp : GPU_IndexOp<"grid_dim">;
+def GPU_ThreadIdOp : GPU_IndexOp<"thread_id">;
 
-def gpu_Return : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
+def GPU_ReturnOp : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
     Results<(outs)> {
   let summary = "Terminator for GPU launch regions.";
   let description = [{
@@ -59,7 +59,7 @@ def gpu_Return : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
   let printer = [{ p << getOperationName(); }];
 }
 
-def gpu_Yield : GPU_Op<"yield", [Terminator]>,
+def GPU_YieldOp : GPU_Op<"yield", [Terminator]>,
     Arguments<(ins Variadic<AnyType>:$values)> {
   let summary = "GPU yield operation";
   let description = [{
@@ -74,20 +74,20 @@ def gpu_Yield : GPU_Op<"yield", [Terminator]>,
 }
 
 // These mirror the XLA ComparisonDirection enum.
-def gpu_AllReduceOpAdd : StrEnumAttrCase<"add">;
-def gpu_AllReduceOpMul : StrEnumAttrCase<"mul">;
+def GPU_AllReduceOpAdd : StrEnumAttrCase<"add">;
+def GPU_AllReduceOpMul : StrEnumAttrCase<"mul">;
 
-def gpu_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr",
+def GPU_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr",
     "built-in reduction operations supported by gpu.allreduce.",
     [
-      gpu_AllReduceOpAdd,
-      gpu_AllReduceOpMul,
+      GPU_AllReduceOpAdd,
+      GPU_AllReduceOpMul,
     ]>;
 
-def gpu_AllReduce : GPU_Op<"all_reduce",
+def GPU_AllReduceOp : GPU_Op<"all_reduce",
     [SameOperandsAndResultType, IsolatedFromAbove]>,
     Arguments<(ins AnyType:$value,
-               OptionalAttr<gpu_AllReduceOperationAttr>:$op)>,
+               OptionalAttr<GPU_AllReduceOperationAttr>:$op)>,
     Results<(outs AnyType)> {
   let summary = "Reduce values among workgroup.";
   let description = [{
@@ -115,7 +115,7 @@ def gpu_AllReduce : GPU_Op<"all_reduce",
   let verifier = [{ return ::verifyAllReduce(*this); }];
 }
 
-def gpu_Barrier : GPU_Op<"barrier"> {
+def GPU_BarrierOp : GPU_Op<"barrier"> {
   let summary = "Synchronizes all work items of a workgroup.";
   let description = [{
     The "barrier" op synchronizes all work items of a workgroup. It is used
index d7daf10..2b89b72 100644 (file)
@@ -33,6 +33,6 @@ include "mlir/Dialect/GPU/GPUOps.td"
 include "mlir/Dialect/LLVMIR/NVVMOps.td"
 #endif // NVVMIR_OPS
 
-def : Pat<(gpu_Barrier), (NVVM_Barrier0Op)>;
+def : Pat<(GPU_BarrierOp), (NVVM_Barrier0Op)>;
 
 #endif // MLIR_CONVERSION_GPUTONVVM_TD
index 462457c..b051514 100644 (file)
@@ -41,7 +41,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
       Location, Value *, Value *, ConversionPatternRewriter &)>;
 
   explicit GPUAllReduceOpLowering(LLVMTypeConverter &lowering_)
-      : LLVMOpLowering(gpu::AllReduce::getOperationName(),
+      : LLVMOpLowering(gpu::AllReduceOp::getOperationName(),
                        lowering_.getDialect()->getContext(), lowering_),
         int32Type(LLVM::LLVMType::getInt32Ty(lowering_.getDialect())) {}
 
@@ -55,7 +55,8 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
     assert(op->getOperand(0)->getType().isIntOrFloat());
 
     // Create the reduction using an accumulator factory.
-    AccumulatorFactory factory = getFactory(cast<gpu::AllReduce>(op), operand);
+    AccumulatorFactory factory =
+        getFactory(cast<gpu::AllReduceOp>(op), operand);
     assert(factory && "failed to create accumulator factory");
     Value *result = createBlockReduce(loc, operand, factory, rewriter);
 
@@ -66,7 +67,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
 private:
   /// Returns an accumulator factory using either the op attribute or the body
   /// region.
-  AccumulatorFactory getFactory(gpu::AllReduce allReduce,
+  AccumulatorFactory getFactory(gpu::AllReduceOp allReduce,
                                 Value *operand) const {
     if (!allReduce.body().empty()) {
       return getFactory(allReduce.body());
@@ -103,7 +104,7 @@ private:
       // Replace all gpu.yield ops with branch out of body.
       for (; block != split; block = block->getNextNode()) {
         Operation *terminator = block->getTerminator();
-        if (!llvm::isa<gpu::Yield>(terminator))
+        if (!llvm::isa<gpu::YieldOp>(terminator))
           continue;
         rewriter.setInsertionPointToEnd(block);
         rewriter.replaceOpWithNewOp<LLVM::BrOp>(
@@ -469,13 +470,13 @@ public:
     populateStdToLLVMConversionPatterns(converter, patterns);
     populateWithGenerated(&getContext(), &patterns);
     patterns.insert<
-        GPUIndexIntrinsicOpLowering<gpu::ThreadId, NVVM::ThreadIdXOp,
+        GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, NVVM::ThreadIdXOp,
                                     NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>,
-        GPUIndexIntrinsicOpLowering<gpu::BlockDim, NVVM::BlockDimXOp,
+        GPUIndexIntrinsicOpLowering<gpu::BlockDimOp, NVVM::BlockDimXOp,
                                     NVVM::BlockDimYOp, NVVM::BlockDimZOp>,
-        GPUIndexIntrinsicOpLowering<gpu::BlockId, NVVM::BlockIdXOp,
+        GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, NVVM::BlockIdXOp,
                                     NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
-        GPUIndexIntrinsicOpLowering<gpu::GridDim, NVVM::GridDimXOp,
+        GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
                                     NVVM::GridDimYOp, NVVM::GridDimZOp>,
         GPUAllReduceOpLowering>(converter);
 
@@ -484,7 +485,7 @@ public:
     target.addLegalDialect<LLVM::LLVMDialect>();
     target.addLegalDialect<NVVM::NVVMDialect>();
     // TODO(csigg): Remove once we support replacing non-root ops.
-    target.addLegalOp<gpu::Yield>();
+    target.addLegalOp<gpu::YieldOp>();
     if (failed(applyPartialConversion(m, target, patterns, &converter)))
       signalPassFailure();
   }
index 3b8ad14..2ea587e 100644 (file)
@@ -50,13 +50,13 @@ public:
     LLVMTypeConverter converter(m.getContext());
     populateStdToLLVMConversionPatterns(converter, patterns);
     patterns.insert<
-        GPUIndexIntrinsicOpLowering<gpu::ThreadId, ROCDL::ThreadIdXOp,
+        GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
                                     ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>,
-        GPUIndexIntrinsicOpLowering<gpu::BlockDim, ROCDL::BlockDimXOp,
+        GPUIndexIntrinsicOpLowering<gpu::BlockDimOp, ROCDL::BlockDimXOp,
                                     ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>,
-        GPUIndexIntrinsicOpLowering<gpu::BlockId, ROCDL::BlockIdXOp,
+        GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, ROCDL::BlockIdXOp,
                                     ROCDL::BlockIdYOp, ROCDL::BlockIdZOp>,
-        GPUIndexIntrinsicOpLowering<gpu::GridDim, ROCDL::GridDimXOp,
+        GPUIndexIntrinsicOpLowering<gpu::GridDimOp, ROCDL::GridDimXOp,
                                     ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
         converter);
 
index 45720fe..4f73b9b 100644 (file)
@@ -149,11 +149,12 @@ void GPUToSPIRVPass::runOnModule() {
   OwningRewritePatternList patterns;
   patterns.insert<
       KernelFnConversion,
-      LaunchConfigConversion<gpu::BlockDim, spirv::BuiltIn::WorkgroupSize>,
-      LaunchConfigConversion<gpu::BlockId, spirv::BuiltIn::WorkgroupId>,
-      LaunchConfigConversion<gpu::GridDim, spirv::BuiltIn::NumWorkgroups>,
-      LaunchConfigConversion<gpu::ThreadId, spirv::BuiltIn::LocalInvocationId>>(
-      context, typeConverter);
+      LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
+      LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
+      LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
+      LaunchConfigConversion<gpu::ThreadIdOp,
+                             spirv::BuiltIn::LocalInvocationId>>(context,
+                                                                 typeConverter);
   populateStandardToSPIRVPatterns(context, patterns);
 
   ConversionTarget target(*context);
index 154a866..2229455 100644 (file)
@@ -261,7 +261,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
   Location terminatorLoc = terminator.getLoc();
   terminator.erase();
   builder.setInsertionPointToEnd(innermostForOp.getBody());
-  builder.create<gpu::Return>(terminatorLoc);
+  builder.create<gpu::ReturnOp>(terminatorLoc);
   launchOp.getBody().front().getOperations().splice(
       launchOp.getBody().front().begin(),
       innermostForOp.getBody()->getOperations());
index 6e55c0b..d1f223e 100644 (file)
@@ -137,7 +137,7 @@ template <typename T> static LogicalResult verifyIndexOp(T op) {
   return success();
 }
 
-static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) {
+static LogicalResult verifyAllReduce(gpu::AllReduceOp allReduce) {
   if (allReduce.body().empty() != allReduce.op().hasValue())
     return allReduce.emitError(
         "expected either an op attribute or a non-empty body");
@@ -150,7 +150,7 @@ static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) {
     }
     unsigned yieldCount = 0;
     for (Block &block : allReduce.body()) {
-      if (auto yield = dyn_cast<gpu::Yield>(block.getTerminator())) {
+      if (auto yield = dyn_cast<gpu::YieldOp>(block.getTerminator())) {
         if (yield.getNumOperands() != 1)
           return allReduce.emitError("expected one gpu.yield operand");
         if (yield.getOperand(0)->getType() != allReduce.getType())
@@ -164,8 +164,13 @@ static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) {
   return success();
 }
 
+// Namespace avoids ambiguous ReturnOpOperandAdaptor.
+namespace mlir {
+namespace gpu {
 #define GET_OP_CLASSES
 #include "mlir/Dialect/GPU/GPUOps.cpp.inc"
+} // namespace gpu
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // LaunchOp
@@ -263,7 +268,7 @@ LogicalResult LaunchOp::verify() {
       continue;
     if (block.back().getNumSuccessors() != 0)
       continue;
-    if (!isa<gpu::Return>(&block.back())) {
+    if (!isa<gpu::ReturnOp>(&block.back())) {
       return block.back()
                  .emitError("expected 'gpu.terminator' or a terminator with "
                             "successors")
index 8377ec6..d9a1106 100644 (file)
@@ -43,10 +43,10 @@ static void createForAllDimensions(OpBuilder &builder, Location loc,
 static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
   OpBuilder OpBuilder(kernelFunc.getBody());
   SmallVector<Value *, 12> indexOps;
-  createForAllDimensions<gpu::BlockId>(OpBuilder, loc, indexOps);
-  createForAllDimensions<gpu::ThreadId>(OpBuilder, loc, indexOps);
-  createForAllDimensions<gpu::GridDim>(OpBuilder, loc, indexOps);
-  createForAllDimensions<gpu::BlockDim>(OpBuilder, loc, indexOps);
+  createForAllDimensions<gpu::BlockIdOp>(OpBuilder, loc, indexOps);
+  createForAllDimensions<gpu::ThreadIdOp>(OpBuilder, loc, indexOps);
+  createForAllDimensions<gpu::GridDimOp>(OpBuilder, loc, indexOps);
+  createForAllDimensions<gpu::BlockDimOp>(OpBuilder, loc, indexOps);
   // Replace the leading 12 function args with the respective thread/block index
   // operations. Iterate backwards since args are erased and indices change.
   for (int i = 11; i >= 0; --i) {
@@ -107,7 +107,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
   outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
                        builder.getUnitAttr());
   injectGpuIndexOperations(loc, outlinedFunc);
-  outlinedFunc.walk([](gpu::Return op) {
+  outlinedFunc.walk([](gpu::ReturnOp op) {
     OpBuilder replacer(op);
     replacer.create<ReturnOp>(op.getLoc());
     op.erase();