From 66d555aa33516f26adb833ef3ab3754926fc97cd Mon Sep 17 00:00:00 2001 From: Javier Setoain Date: Mon, 12 Dec 2022 18:02:59 +0000 Subject: [PATCH] [mlir][sparse][ArmSVE] Enable sparse integration tests for ArmSVE This patch adds the logic necessary to target the sparse-tensor dialect integration tests for SVE. As the LLVM backend for AArch64 does not currently support product reductions, the corresponding tests are disabled for SVE. Not all tests have been updated yet. The remaining tests will be refactored in a separate patch shortly. Differential Revision: https://reviews.llvm.org/D121304 Co-authored-by: Andrzej Warzynski --- .../SparseTensor/CPU/Inputs/main_for_lli.ll | 8 ++ .../Dialect/SparseTensor/CPU/lit.local.cfg | 28 +++++ .../Dialect/SparseTensor/CPU/sparse_cast.mlir | 21 +++- .../SparseTensor/CPU/sparse_filter_conv2d.mlir | 21 +++- .../Dialect/SparseTensor/CPU/sparse_flatten.mlir | 22 +++- .../SparseTensor/CPU/sparse_index_dense.mlir | 21 +++- .../Dialect/SparseTensor/CPU/sparse_matvec.mlir | 27 +++-- .../Dialect/SparseTensor/CPU/sparse_mttkrp.mlir | 22 +++- .../SparseTensor/CPU/sparse_out_simple.mlir | 22 +++- .../SparseTensor/CPU/sparse_quantized_matmul.mlir | 21 +++- .../SparseTensor/CPU/sparse_reductions.mlir | 54 +++------- .../SparseTensor/CPU/sparse_reductions_prod.mlir | 115 +++++++++++++++++++++ .../SparseTensor/CPU/sparse_sampled_matmul.mlir | 22 +++- .../SparseTensor/CPU/sparse_sampled_mm_fusion.mlir | 21 +++- .../Dialect/SparseTensor/CPU/sparse_scale.mlir | 19 +++- .../Dialect/SparseTensor/CPU/sparse_spmm.mlir | 22 +++- .../Dialect/SparseTensor/CPU/sparse_sum.mlir | 22 +++- 17 files changed, 384 insertions(+), 104 deletions(-) create mode 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll create mode 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir mode change 100755 => 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll b/mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll new file mode 100644 index 0000000..7b74618 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll @@ -0,0 +1,8 @@ +; Dummy wrapper required by lli, which does not support void functions (i.e. +; it fails if non-zero code is returned) +define i32 @entry_lli() { + call void @entry() + ret i32 0 +} + +declare void @entry() diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg index 83247d7..dba6373 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg @@ -3,3 +3,31 @@ import sys # No JIT on win32. if sys.platform == 'win32': config.unsupported = True + +# ArmSVE tests must be enabled via build flag. +if config.mlir_run_arm_sve_tests: + config.substitutions.append(('%ENABLE_VLA', 'true')) + config.substitutions.append(('%VLA_ARCH_ATTR_OPTIONS', '--march=aarch64 --mattr="+sve"')) + lli_cmd = 'lli' + if config.arm_emulator_lli_executable: + lli_cmd = config.arm_emulator_lli_executable + + if config.arm_emulator_utils_lib_dir: + config.substitutions.append(('%mlir_native_utils_lib_dir', config.arm_emulator_utils_lib_dir)) + else: + config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir)) + + if config.arm_emulator_executable: + # Run test in emulator (qemu or armie). + emulation_cmd = config.arm_emulator_executable + if config.arm_emulator_options: + emulation_cmd = emulation_cmd + ' ' + config.arm_emulator_options + emulation_cmd = emulation_cmd + ' ' + lli_cmd + config.substitutions.append(('%lli', emulation_cmd)) + else: + config.substitutions.append(('%lli', lli_cmd)) +else: + config.substitutions.append(('%ENABLE_VLA', 'false')) + config.substitutions.append(('%VLA_ARCH_ATTR_OPTIONS', '')) + config.substitutions.append(('%lli', 'lli')) + config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir)) diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir index 13a9dcc..4cc6126 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir @@ -1,19 +1,30 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: mlir-cpu-runner \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} #SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir index 5c1e993..952f01a 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir @@ -1,19 +1,30 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: mlir-cpu-runner \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} #DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir index 6917274..28f9033 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir @@ -1,20 +1,32 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \ // DEFINE: mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \ +// REDEFINE: %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext --dlopen=%mlir_lib_dir/libmlir_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} !Filename = !llvm.ptr diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir index af64c09..c5ce259 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir @@ -1,19 +1,30 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: mlir-cpu-runner \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} #SparseVector = #sparse_tensor.encoding<{ dimLevelType = ["compressed"] diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir index 2c18c2b..d9e3143 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -1,28 +1,39 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ // DEFINE: mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s -// -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with parallelization strategy. // REDEFINE: %{option} = "enable-runtime-library=true parallelization-strategy=any-storage-any-loop" -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and parallelization strategy. // REDEFINE: %{option} = "enable-runtime-library=false parallelization-strategy=any-storage-any-loop" -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ +// REDEFINE: %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} !Filename = !llvm.ptr diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir index 375db9a..55db2ba 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir @@ -1,20 +1,32 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \ // DEFINE: mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \ +// REDEFINE: %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext -dlopen=%mlir_lib_dir/libmlir_runner_utils%shlibext| \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} !Filename = !llvm.ptr diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir index 630d1b7..8a36bdb 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir @@ -1,20 +1,32 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \ // DEFINE: mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \ +// REDEFINE: %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} !Filename = !llvm.ptr diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir index 2c1200a..0bcd0db 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir @@ -1,19 +1,30 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: mlir-cpu-runner \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} #DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir index 65f6805..b962ae4 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir @@ -1,19 +1,31 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: mlir-cpu-runner \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s + +// Reduction in this file are supported by the AArch64 SVE backend #SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }> #DV = #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }> @@ -54,30 +66,6 @@ module { return %0 : tensor } - func.func @prod_reduction_i32(%arga: tensor<32xi32, #DV>, - %argx: tensor) -> tensor { - %0 = linalg.generic #trait_reduction - ins(%arga: tensor<32xi32, #DV>) - outs(%argx: tensor) { - ^bb(%a: i32, %x: i32): - %0 = arith.muli %x, %a : i32 - linalg.yield %0 : i32 - } -> tensor - return %0 : tensor - } - - func.func @prod_reduction_f32(%arga: tensor<32xf32, #DV>, - %argx: tensor) -> tensor { - %0 = linalg.generic #trait_reduction - ins(%arga: tensor<32xf32, #DV>) - outs(%argx: tensor) { - ^bb(%a: f32, %x: f32): - %0 = arith.mulf %x, %a : f32 - linalg.yield %0 : f32 - } -> tensor - return %0 : tensor - } - func.func @and_reduction_i32(%arga: tensor<32xi32, #DV>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction @@ -169,10 +157,6 @@ module { : (tensor<32xi32, #SV>, tensor) -> tensor %1 = call @sum_reduction_f32(%sparse_input_f32, %rf) : (tensor<32xf32, #SV>, tensor) -> tensor - %2 = call @prod_reduction_i32(%dense_input_i32, %ri) - : (tensor<32xi32, #DV>, tensor) -> tensor - %3 = call @prod_reduction_f32(%dense_input_f32, %rf) - : (tensor<32xf32, #DV>, tensor) -> tensor %4 = call @and_reduction_i32(%dense_input_i32, %ri) : (tensor<32xi32, #DV>, tensor) -> tensor %5 = call @or_reduction_i32(%sparse_input_i32, %ri) @@ -184,16 +168,12 @@ module { // // CHECK: 26 // CHECK: 27.5 - // CHECK: 3087 - // CHECK: 168 // CHECK: 1 // CHECK: 15 // CHECK: 10 // call @dump_i32(%0) : (tensor) -> () call @dump_f32(%1) : (tensor) -> () - call @dump_i32(%2) : (tensor) -> () - call @dump_f32(%3) : (tensor) -> () call @dump_i32(%4) : (tensor) -> () call @dump_i32(%5) : (tensor) -> () call @dump_i32(%6) : (tensor) -> () diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir new file mode 100644 index 0000000..a4942b9 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir @@ -0,0 +1,115 @@ +// DEFINE: %{option} = enable-runtime-library=true +// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ +// DEFINE: mlir-cpu-runner \ +// DEFINE: -e entry -entry-point-result=void \ +// DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// DEFINE: FileCheck %s +// +// RUN: %{command} +// +// Do the same run, but now with direct IR generation. +// REDEFINE: %{option} = enable-runtime-library=false +// RUN: %{command} +// +// Do the same run, but now with direct IR generation and vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" +// RUN: %{command} + +// Product reductions - kept in a seperate file as these are not supported by +// the AArch64 SVE backend (so the set-up is a bit different to +// sparse_reducitons.mlir) + +#SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }> +#DV = #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }> + +#trait_reduction = { + indexing_maps = [ + affine_map<(i) -> (i)>, // a + affine_map<(i) -> ()> // x (scalar out) + ], + iterator_types = ["reduction"], + doc = "x += OPER_i a(i)" +} + +// An example of vector reductions. +module { + + func.func @prod_reduction_i32(%arga: tensor<32xi32, #DV>, + %argx: tensor) -> tensor { + %0 = linalg.generic #trait_reduction + ins(%arga: tensor<32xi32, #DV>) + outs(%argx: tensor) { + ^bb(%a: i32, %x: i32): + %0 = arith.muli %x, %a : i32 + linalg.yield %0 : i32 + } -> tensor + return %0 : tensor + } + + func.func @prod_reduction_f32(%arga: tensor<32xf32, #DV>, + %argx: tensor) -> tensor { + %0 = linalg.generic #trait_reduction + ins(%arga: tensor<32xf32, #DV>) + outs(%argx: tensor) { + ^bb(%a: f32, %x: f32): + %0 = arith.mulf %x, %a : f32 + linalg.yield %0 : f32 + } -> tensor + return %0 : tensor + } + + func.func @dump_i32(%arg0 : tensor) { + %v = tensor.extract %arg0[] : tensor + vector.print %v : i32 + return + } + + func.func @dump_f32(%arg0 : tensor) { + %v = tensor.extract %arg0[] : tensor + vector.print %v : f32 + return + } + + func.func @entry() { + %ri = arith.constant dense< 7 > : tensor + %rf = arith.constant dense< 2.0 > : tensor + + %c_1_i32 = arith.constant dense<[ + 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 3 + ]> : tensor<32xi32> + + %c_1_f32 = arith.constant dense<[ + 1.0, 1.0, 1.0, 3.5, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0 + ]> : tensor<32xf32> + + // Convert constants to annotated tensors. + %dense_input_i32 = sparse_tensor.convert %c_1_i32 + : tensor<32xi32> to tensor<32xi32, #DV> + %dense_input_f32 = sparse_tensor.convert %c_1_f32 + : tensor<32xf32> to tensor<32xf32, #DV> + + // Call the kernels. + %2 = call @prod_reduction_i32(%dense_input_i32, %ri) + : (tensor<32xi32, #DV>, tensor) -> tensor + %3 = call @prod_reduction_f32(%dense_input_f32, %rf) + : (tensor<32xf32, #DV>, tensor) -> tensor + + // Verify results. + // + // CHECK: 3087 + // CHECK: 168 + // + call @dump_i32(%2) : (tensor) -> () + call @dump_f32(%3) : (tensor) -> () + + // Release the resources. + bufferization.dealloc_tensor %dense_input_i32 : tensor<32xi32, #DV> + bufferization.dealloc_tensor %dense_input_f32 : tensor<32xf32, #DV> + + return + } +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir index 76d23c1..dc1da5e 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir @@ -1,20 +1,32 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \ // DEFINE: mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \ +// REDEFINE: %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} !Filename = !llvm.ptr diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir old mode 100755 new mode 100644 index 35cfe58..669840e --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir @@ -1,19 +1,30 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: mlir-cpu-runner \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true" -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} #SM = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir index 182d7ae..2c872e3 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir @@ -1,15 +1,26 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: mlir-cpu-runner \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir index de44ae3..1bb3f39 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir @@ -1,20 +1,32 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ // DEFINE: mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ +// REDEFINE: lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} !Filename = !llvm.ptr diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir index 43014dd..588bcb0 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir @@ -1,20 +1,32 @@ // DEFINE: %{option} = enable-runtime-library=true -// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \ -// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test_symmetric.mtx" \ +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test_symmetric.mtx" \ // DEFINE: mlir-cpu-runner \ // DEFINE: -e entry -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // DEFINE: FileCheck %s // -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation. // REDEFINE: %{option} = enable-runtime-library=false -// RUN: %{command} +// RUN: %{compile} | %{run} // // Do the same run, but now with direct IR generation and vectorization. // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" -// RUN: %{command} +// RUN: %{compile} | %{run} + +// If SVE is available, do the same run, but now with direct IR generation and VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test_symmetric.mtx" \ +// REDEFINE: %lli \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} !Filename = !llvm.ptr -- 2.7.4