From: Thomas Raoux Date: Mon, 9 May 2022 15:46:28 +0000 (+0000) Subject: [mlir][nvvm] Add attribute to nvvm.cpAsyncOp to control l1 bypass X-Git-Tag: upstream/15.0.7~8217 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=09fc685ce6808ae34de8e235bad686252eef3812;p=platform%2Fupstream%2Fllvm.git [mlir][nvvm] Add attribute to nvvm.cpAsyncOp to control l1 bypass Add attribute to be able to generate the intrinsic version of async copy generating a copy with l1 bypass. This correspond to cp.async.cg.shared.global in ptx. Differential Revision: https://reviews.llvm.org/D125241 --- diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index f9d32f4..f19500e 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -153,7 +153,8 @@ def NVVM_VoteBallotOp : def NVVM_CpAsyncOp : NVVM_Op<"cp.async.shared.global">, Arguments<(ins LLVM_i8Ptr_shared:$dst, LLVM_i8Ptr_global:$src, - I32Attr:$size)> { + I32Attr:$size, + OptionalAttr:$bypass_l1)> { string llvmBuilder = [{ llvm::Intrinsic::ID id; switch ($size) { @@ -164,7 +165,10 @@ def NVVM_CpAsyncOp : NVVM_Op<"cp.async.shared.global">, id = llvm::Intrinsic::nvvm_cp_async_ca_shared_global_8; break; case 16: - id = llvm::Intrinsic::nvvm_cp_async_ca_shared_global_16; + if(static_cast($bypass_l1)) + id = llvm::Intrinsic::nvvm_cp_async_cg_shared_global_16; + else + id = llvm::Intrinsic::nvvm_cp_async_ca_shared_global_16; break; default: llvm_unreachable("unsupported async copy size"); diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 4f65730..6ccc8a0 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -164,7 +164,8 @@ struct GPUAsyncCopyLowering int64_t sizeInBytes = (dstMemrefType.getElementTypeBitWidth() / 8) * numElements; rewriter.create(loc, dstPtr, scrPtr, - rewriter.getI32IntegerAttr(sizeInBytes)); + rewriter.getI32IntegerAttr(sizeInBytes), + /*bypassL1=*/UnitAttr()); // Drop the result token. Value zero = rewriter.create( diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 345d900..640e84a 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -67,6 +67,8 @@ void VoteBallotOp::print(OpAsmPrinter &p) { printNVVMIntrinsicOp(p, *this); } LogicalResult CpAsyncOp::verify() { if (size() != 4 && size() != 8 && size() != 16) return emitError("expected byte size to be either 4, 8 or 16."); + if (bypass_l1() && size() != 16) + return emitError("bypass l1 is only support for 16 bytes copy."); return success(); } diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 50b9f1b..876668d 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -1261,6 +1261,14 @@ func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { // ----- +func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { + // expected-error @below {{bypass l1 is only support for 16 bytes copy.}} + nvvm.cp.async.shared.global %arg0, %arg1, 8 {bypass_l1} + return +} + +// ----- + func.func @gep_struct_variable(%arg0: !llvm.ptr>, %arg1: i32, %arg2: i32) { // expected-error @below {{op expected index 1 indexing a struct to be constant}} llvm.getelementptr %arg0[%arg1, %arg1] : (!llvm.ptr>, i32, i32) -> !llvm.ptr diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index dfe0443..728755d 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -258,6 +258,8 @@ func.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 nvvm.cp.async.shared.global %arg0, %arg1, 16 +// CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 {bypass_l1} + nvvm.cp.async.shared.global %arg0, %arg1, 16 {bypass_l1} // CHECK: nvvm.cp.async.commit.group nvvm.cp.async.commit.group // CHECK: nvvm.cp.async.wait.group 0 diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index fddfdda..f3bd013 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -287,6 +287,8 @@ llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { nvvm.cp.async.shared.global %arg0, %arg1, 8 // CHECK: call void @llvm.nvvm.cp.async.ca.shared.global.16(i8 addrspace(3)* %{{.*}}, i8 addrspace(1)* %{{.*}}) nvvm.cp.async.shared.global %arg0, %arg1, 16 +// CHECK: call void @llvm.nvvm.cp.async.cg.shared.global.16(i8 addrspace(3)* %{{.*}}, i8 addrspace(1)* %{{.*}}) + nvvm.cp.async.shared.global %arg0, %arg1, 16 {bypass_l1} // CHECK: call void @llvm.nvvm.cp.async.commit.group() nvvm.cp.async.commit.group // CHECK: call void @llvm.nvvm.cp.async.wait.group(i32 0)