From: Christian Sigg Date: Mon, 30 May 2022 07:18:23 +0000 (+0200) Subject: [MLIR][NVVM] NFC: add labels to test functions. X-Git-Tag: upstream/15.0.7~6415 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=544d6507baa2c708aa96b9d3c18c96838910b1c8;p=platform%2Fupstream%2Fllvm.git [MLIR][NVVM] NFC: add labels to test functions. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D126631 --- diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 728755d..9b28841 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s -split-input-file -verify-diagnostics | FileCheck %s +// CHECK-LABEL: @nvvm_special_regs func.func @nvvm_special_regs() -> i32 { // CHECK: nvvm.read.ptx.sreg.tid.x : i32 %0 = nvvm.read.ptx.sreg.tid.x : i32 @@ -28,12 +29,14 @@ func.func @nvvm_special_regs() -> i32 { llvm.return %0 : i32 } -func.func @llvm.nvvm.barrier0() { +// CHECK-LABEL: @llvm_nvvm_barrier0 +func.func @llvm_nvvm_barrier0() { // CHECK: nvvm.barrier0 nvvm.barrier0 llvm.return } +// CHECK-LABEL: @nvvm_shfl func.func @nvvm_shfl( %arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32, %arg4 : f32) -> i32 { @@ -50,6 +53,7 @@ func.func @nvvm_shfl( llvm.return %0 : i32 } +// CHECK-LABEL: @nvvm_shfl_pred func.func @nvvm_shfl_pred( %arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32, %arg4 : f32) -> !llvm.struct<(i32, i1)> { @@ -60,6 +64,7 @@ func.func @nvvm_shfl_pred( llvm.return %0 : !llvm.struct<(i32, i1)> } +// CHECK-LABEL: @nvvm_vote( func.func @nvvm_vote(%arg0 : i32, %arg1 : i1) -> i32 { // CHECK: nvvm.vote.ballot.sync %{{.*}}, %{{.*}} : i32 %0 = nvvm.vote.ballot.sync %arg0, %arg1 : i32 @@ -77,19 +82,21 @@ func.func @nvvm_mma_m8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } +// CHECK-LABEL: @nvvm_mma_m8n8k4_f16_f16 func.func @nvvm_mma_m8n8k4_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : vector<2xf16>, %c1 : vector<2xf16>, %c2 : vector<2xf16>, %c3 : vector<2xf16>) { + %c0 : vector<2xf16>, %c1 : vector<2xf16>, %c2 : vector<2xf16>, %c3 : vector<2xf16>) { // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}] %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - shape = {k = 4 : i32, m = 8 : i32, n = 8 : i32}} : (vector<2xf16>,vector<2xf16>,vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + shape = {k = 4 : i32, m = 8 : i32, n = 8 : i32}} : (vector<2xf16>,vector<2xf16>,vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> } +// CHECK-LABEL: @nvvm_mma_m8n8k16_s8_s8 func.func @nvvm_mma_m8n8k16_s8_s8(%a0 : i32, %b0 : i32, - %c0 : i32, %c1 : i32) { - // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 16 : i32, m = 8 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32)> + %c0 : i32, %c1 : i32) { + // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 16 : i32, m = 8 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32)> %0 = nvvm.mma.sync A[%a0] B[%b0] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, @@ -98,20 +105,22 @@ func.func @nvvm_mma_m8n8k16_s8_s8(%a0 : i32, %b0 : i32, llvm.return %0 : !llvm.struct<(i32, i32)> } -func.func @nvvm_mma_m16n8k8_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, +// CHECK-LABEL: @nvvm_mma_m16n8k8_f16_f16 +func.func @nvvm_mma_m16n8k8_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %b0 : vector<2xf16>, %c0 : vector<2xf16>, %c1 : vector<2xf16>) { // CHECK: nvvm.mma.sync A[%{{.*}}, %{{.*}}] B[%{{.*}}] C[%{{.*}}, %{{.*}}] {{{.*}}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - shape = {k = 8 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + shape = {k = 8 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f16 func.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : vector<2xf16>, %c1 : vector<2xf16>) { + %c0 : vector<2xf16>, %c1 : vector<2xf16>) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -119,10 +128,11 @@ func.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f16 func.func @nvvm_mma_m16n8k16_f32_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : vector<2xf16>, %c1 : vector<2xf16>) { + %c0 : vector<2xf16>, %c1 : vector<2xf16>) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -130,10 +140,11 @@ func.func @nvvm_mma_m16n8k16_f32_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, llvm.return %0 : !llvm.struct<(f32, f32, f32, f32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f32 func.func @nvvm_mma_m16n8k16_f16_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { + %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -141,10 +152,11 @@ func.func @nvvm_mma_m16n8k16_f16_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f32 func.func @nvvm_mma_m16n8k16_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { + %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -152,7 +164,8 @@ func.func @nvvm_mma_m16n8k16_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, llvm.return %0 : !llvm.struct<(f32, f32, f32, f32)> } -func.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32, +// CHECK-LABEL: @nvvm_mma_m16n8k4_tf32_f32 +func.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32, %b0 : i32, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 4 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, f32) -> !llvm.struct<(f32, f32, f32, f32)> @@ -163,8 +176,9 @@ func.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32, llvm.return %0 : !llvm.struct<(f32, f32, f32, f32)> } -func.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { +// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_s8 +func.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, %b0 : i32, + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -174,9 +188,10 @@ func.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, %b0 : i32, llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } -func.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, - %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { +// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_u8 +func.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, + %b0 : i32, + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -186,24 +201,26 @@ func.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k256_b1_b1 func.func @nvvm_mma_m16n8k256_b1_b1(%a0 : i32, %a1 : i32, %a2 : i32, %a3 : i32, %b0 : i32, %b1 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {b1Op = #nvvm.mma_b1op, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 256 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, b1Op = #nvvm.mma_b1op, shape = {k = 256 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k128_b1_b1 func.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {b1Op = #nvvm.mma_b1op, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 128 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, b1Op = #nvvm.mma_b1op, shape = {k = 128 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> @@ -212,7 +229,7 @@ func.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32, // CHECK-LABEL: @nvvm_mma_m8n8k128_b1_b1 func.func @nvvm_mma_m8n8k128_b1_b1(%a0 : i32, %b0 : i32, - %c0 : i32, %c1 : i32) { + %c0 : i32, %c1 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}] {b1Op = #nvvm.mma_b1op, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 128 : i32, m = 8 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32)> %0 = nvvm.mma.sync A[%a0] B[%b0] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -224,7 +241,7 @@ func.func @nvvm_mma_m8n8k128_b1_b1(%a0 : i32, // CHECK-LABEL: @nvvm_mma_m16n8k32_s4_s4 func.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 32 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -243,6 +260,7 @@ func.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.stru llvm.return %0 : !llvm.struct<(i32, i32, i32, i32)> } +// CHECK-LABEL: @nvvm_wmma_mma func.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : i32, %6 : i32, %7 : i32, %8 : f32, %9 : f32, %10 : f32, %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) @@ -255,6 +273,7 @@ func.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : llvm.return %r : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } +// CHECK-LABEL: @cp_async llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 nvvm.cp.async.shared.global %arg0, %arg1, 16 diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index f3bd013..36239c3 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -1,5 +1,6 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s +// CHECK-LABEL: @nvvm_special_regs llvm.func @nvvm_special_regs() -> i32 { // CHECK: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %1 = nvvm.read.ptx.sreg.tid.x : i32 @@ -32,12 +33,14 @@ llvm.func @nvvm_special_regs() -> i32 { llvm.return %1 : i32 } +// CHECK-LABEL: @llvm_nvvm_barrier0 llvm.func @llvm_nvvm_barrier0() { // CHECK: call void @llvm.nvvm.barrier0() nvvm.barrier0 llvm.return } +// CHECK-LABEL: @nvvm_shfl llvm.func @nvvm_shfl( %0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : f32) -> i32 { @@ -60,6 +63,7 @@ llvm.func @nvvm_shfl( llvm.return %6 : i32 } +// CHECK-LABEL: @nvvm_shfl_pred llvm.func @nvvm_shfl_pred( %0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : f32) -> !llvm.struct<(i32, i1)> { @@ -82,6 +86,7 @@ llvm.func @nvvm_shfl_pred( llvm.return %6 : !llvm.struct<(i32, i1)> } +// CHECK-LABEL: @nvvm_vote llvm.func @nvvm_vote(%0 : i32, %1 : i1) -> i32 { // CHECK: call i32 @llvm.nvvm.vote.ballot.sync(i32 %{{.*}}, i1 %{{.*}}) %3 = nvvm.vote.ballot.sync %0, %1 : i32 @@ -94,11 +99,12 @@ llvm.func @nvvm_mma_mn8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2x %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> { // CHECK: call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32 - %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7] + %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {m = 8 : i32, n = 8 : i32, k = 4 : i32}} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f16 llvm.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, @@ -111,10 +117,11 @@ llvm.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, } // f32 return type, f16 accumulate type +// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f16 llvm.func @nvvm_mma_m16n8k16_f32_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : vector<2xf16>, %c1 : vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32)> { + %c0 : vector<2xf16>, %c1 : vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32)> { // CHECK: call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f16 %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -123,6 +130,7 @@ llvm.func @nvvm_mma_m16n8k16_f32_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, } // f16 return type, f32 accumulate type +// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f32 llvm.func @nvvm_mma_m16n8k16_f16_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, @@ -135,55 +143,60 @@ llvm.func @nvvm_mma_m16n8k16_f16_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, } // f32 return type, f32 accumulate type +// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f32 llvm.func @nvvm_mma_m16n8k16_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) -> !llvm.struct<(f32, f32, f32, f32)> { - // CHECK: call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32 + // CHECK: call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32 %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {m = 16 : i32, n = 8 : i32, k = 16 : i32}} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32)> } -llvm.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, - %b0 : i32, +// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_s8 +llvm.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, + %b0 : i32, %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.s8 %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, intOverflowBehavior=#nvvm.mma_int_overflow, shape = {m = 16 : i32, n = 8 : i32, k = 16 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } -llvm.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, - %b0 : i32, +// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_u8 +llvm.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, + %b0 : i32, %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.satfinite.s8.u8 %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, intOverflowBehavior=#nvvm.mma_int_overflow, shape = {m = 16 : i32, n = 8 : i32, k = 16 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } -llvm.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32, +// CHECK-LABEL: @nvvm_mma_m16n8k128_b1_b1 +llvm.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.mma.xor.popc.m16n8k128.row.col.b1 %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, b1Op = #nvvm.mma_b1op, shape = {k = 128 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k32_s4_s4 llvm.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k32.row.col.satfinite.s4 %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -193,16 +206,18 @@ llvm.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } +// CHECK-LABEL: @nvvm_mma_m8n8k4_f64_f64 llvm.func @nvvm_mma_m8n8k4_f64_f64(%a0 : f64, - %b0 : f64, + %b0 : f64, %c0 : f64, %c1 : f64) -> !llvm.struct<(f64, f64)> { // CHECK: call { double, double } @llvm.nvvm.mma.m8n8k4.row.col.f64 %0 = nvvm.mma.sync A[%a0] B[%b0] C[%c0, %c1] - {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, + {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {m = 8 : i32, n = 8 : i32, k = 4 : i32}} : (f64, f64, f64) -> !llvm.struct<(f64, f64)> llvm.return %0 : !llvm.struct<(f64, f64)> } +// CHECK-LABEL: @nvvm_mma_m16n8k4_tf32_f32 llvm.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32, %b0 : i32, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) -> !llvm.struct<(f32, f32, f32, f32)> { @@ -228,6 +243,7 @@ llvm.func @gpu_wmma_load_op(%arg0: !llvm.ptr, %arg1: i32) { // The test below checks the correct mapping of the nvvm.wmma.*.store.* op to the correct intrinsic // in the LLVM NVPTX backend. +// CHECK-LABEL: @gpu_wmma_store_op llvm.func @gpu_wmma_store_op(%arg0: !llvm.ptr, %arg1: i32, %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, %arg4: vector<2 xf16>, %arg5: vector<2 x f16>) { @@ -240,6 +256,7 @@ llvm.func @gpu_wmma_store_op(%arg0: !llvm.ptr, %arg1: i32, // The test below checks the correct mapping of the nvvm.wmma.*.mma.* op to the correct intrinsic // in the LLVM NVPTX backend. +// CHECK-LABEL: @gpu_wmma_mma_op llvm.func @gpu_wmma_mma_op(%arg0: vector<2 x f16>, %arg1: vector<2 x f16>, %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, %arg4: vector<2 x f16>, %arg5: vector<2 x f16>, @@ -261,6 +278,7 @@ llvm.func @gpu_wmma_mma_op(%arg0: vector<2 x f16>, %arg1: vector<2 x f16>, llvm.return } +// CHECK-LABEL: @nvvm_wmma_load_tf32 llvm.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0i32(i32* %{{.*}}, i32 %{{.*}}) %0 = nvvm.wmma.load %arg0, %arg1 @@ -269,6 +287,7 @@ llvm.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) { llvm.return } +// CHECK-LABEL: @nvvm_wmma_mma llvm.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : i32, %6 : i32, %7 : i32, %8 : f32, %9 : f32, %10 : f32, %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) { @@ -280,6 +299,7 @@ llvm.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : llvm.return } +// CHECK-LABEL: @cp_async llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { // CHECK: call void @llvm.nvvm.cp.async.ca.shared.global.4(i8 addrspace(3)* %{{.*}}, i8 addrspace(1)* %{{.*}}) nvvm.cp.async.shared.global %arg0, %arg1, 4 @@ -296,7 +316,7 @@ llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { llvm.return } -// CHECK-LABEL: @ld_matrix( +// CHECK-LABEL: @ld_matrix llvm.func @ld_matrix(%arg0: !llvm.ptr) { // CHECK: call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3i32(i32 addrspace(3)* %{{.*}}) %l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32