[mlir][sparse][ArmSVE] Enable sparse integration tests for ArmSVE
authorJavier Setoain <javier.setoain@gmail.com>
Mon, 12 Dec 2022 18:02:59 +0000 (18:02 +0000)
committerAndrzej Warzynski <andrzej.warzynski@arm.com>
Tue, 24 Jan 2023 15:21:08 +0000 (15:21 +0000)
This patch adds the logic necessary to target the sparse-tensor dialect
integration tests for SVE. As the LLVM backend for AArch64 does not
currently support product reductions, the corresponding tests are
disabled for SVE.

Not all tests have been updated yet. The remaining tests will be
refactored in a separate patch shortly.

Differential Revision: https://reviews.llvm.org/D121304

Co-authored-by: Andrzej Warzynski <andrzej.warzynski@arm.com>
17 files changed:
mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll [new file with mode: 0644]
mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir [new file with mode: 0644]
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir [changed mode: 0755->0644]
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir

diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll b/mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll
new file mode 100644 (file)
index 0000000..7b74618
--- /dev/null
@@ -0,0 +1,8 @@
+; Dummy wrapper required by lli, which does not support void functions (i.e.
+; it fails if non-zero code is returned)
+define i32 @entry_lli() {
+  call void @entry()
+  ret i32 0
+}
+
+declare void @entry()
index 83247d7..dba6373 100644 (file)
@@ -3,3 +3,31 @@ import sys
 # No JIT on win32.
 if sys.platform == 'win32':
     config.unsupported = True
+
+# ArmSVE tests must be enabled via build flag.
+if config.mlir_run_arm_sve_tests:
+    config.substitutions.append(('%ENABLE_VLA', 'true'))
+    config.substitutions.append(('%VLA_ARCH_ATTR_OPTIONS', '--march=aarch64 --mattr="+sve"'))
+    lli_cmd = 'lli'
+    if config.arm_emulator_lli_executable:
+       lli_cmd = config.arm_emulator_lli_executable
+
+    if config.arm_emulator_utils_lib_dir:
+        config.substitutions.append(('%mlir_native_utils_lib_dir', config.arm_emulator_utils_lib_dir))
+    else:
+        config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir))
+
+    if config.arm_emulator_executable:
+        # Run test in emulator (qemu or armie).
+        emulation_cmd = config.arm_emulator_executable
+        if config.arm_emulator_options:
+            emulation_cmd = emulation_cmd + ' ' + config.arm_emulator_options
+        emulation_cmd = emulation_cmd + ' ' + lli_cmd
+        config.substitutions.append(('%lli', emulation_cmd))
+    else:
+        config.substitutions.append(('%lli', lli_cmd))
+else:
+    config.substitutions.append(('%ENABLE_VLA', 'false'))
+    config.substitutions.append(('%VLA_ARCH_ATTR_OPTIONS', ''))
+    config.substitutions.append(('%lli', 'lli'))
+    config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir))
index 13a9dcc..4cc6126 100644 (file)
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
 
index 5c1e993..952f01a 100644 (file)
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
 
index 6917274..28f9033 100644 (file)
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext --dlopen=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
index af64c09..c5ce259 100644 (file)
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #SparseVector = #sparse_tensor.encoding<{
   dimLevelType = ["compressed"]
index 2c18c2b..d9e3143 100644 (file)
@@ -1,28 +1,39 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
-//
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with parallelization strategy.
 // REDEFINE: %{option} = "enable-runtime-library=true parallelization-strategy=any-storage-any-loop"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and parallelization strategy.
 // REDEFINE: %{option} = "enable-runtime-library=false parallelization-strategy=any-storage-any-loop"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
index 375db9a..55db2ba 100644 (file)
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns"  \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext -dlopen=%mlir_lib_dir/libmlir_runner_utils%shlibext| \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
index 630d1b7..8a36bdb 100644 (file)
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
index 2c1200a..0bcd0db 100644 (file)
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
 
index 65f6805..b962ae4 100644 (file)
@@ -1,19 +1,31 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+
+// Reduction in this file are supported by the AArch64 SVE backend
 
 #SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
 #DV = #sparse_tensor.encoding<{ dimLevelType = [ "dense"      ] }>
@@ -54,30 +66,6 @@ module {
     return %0 : tensor<f32>
   }
 
-  func.func @prod_reduction_i32(%arga: tensor<32xi32, #DV>,
-                           %argx: tensor<i32>) -> tensor<i32> {
-    %0 = linalg.generic #trait_reduction
-      ins(%arga: tensor<32xi32, #DV>)
-      outs(%argx: tensor<i32>) {
-        ^bb(%a: i32, %x: i32):
-          %0 = arith.muli %x, %a : i32
-          linalg.yield %0 : i32
-    } -> tensor<i32>
-    return %0 : tensor<i32>
-  }
-
-  func.func @prod_reduction_f32(%arga: tensor<32xf32, #DV>,
-                           %argx: tensor<f32>) -> tensor<f32> {
-    %0 = linalg.generic #trait_reduction
-      ins(%arga: tensor<32xf32, #DV>)
-      outs(%argx: tensor<f32>) {
-        ^bb(%a: f32, %x: f32):
-          %0 = arith.mulf %x, %a : f32
-          linalg.yield %0 : f32
-    } -> tensor<f32>
-    return %0 : tensor<f32>
-  }
-
   func.func @and_reduction_i32(%arga: tensor<32xi32, #DV>,
                           %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
@@ -169,10 +157,6 @@ module {
        : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
     %1 = call @sum_reduction_f32(%sparse_input_f32, %rf)
        : (tensor<32xf32, #SV>, tensor<f32>) -> tensor<f32>
-    %2 = call @prod_reduction_i32(%dense_input_i32, %ri)
-       : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
-    %3 = call @prod_reduction_f32(%dense_input_f32, %rf)
-       : (tensor<32xf32, #DV>, tensor<f32>) -> tensor<f32>
     %4 = call @and_reduction_i32(%dense_input_i32, %ri)
        : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
     %5 = call @or_reduction_i32(%sparse_input_i32, %ri)
@@ -184,16 +168,12 @@ module {
     //
     // CHECK: 26
     // CHECK: 27.5
-    // CHECK: 3087
-    // CHECK: 168
     // CHECK: 1
     // CHECK: 15
     // CHECK: 10
     //
     call @dump_i32(%0) : (tensor<i32>) -> ()
     call @dump_f32(%1) : (tensor<f32>) -> ()
-    call @dump_i32(%2) : (tensor<i32>) -> ()
-    call @dump_f32(%3) : (tensor<f32>) -> ()
     call @dump_i32(%4) : (tensor<i32>) -> ()
     call @dump_i32(%5) : (tensor<i32>) -> ()
     call @dump_i32(%6) : (tensor<i32>) -> ()
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir
new file mode 100644 (file)
index 0000000..a4942b9
--- /dev/null
@@ -0,0 +1,115 @@
+// DEFINE: %{option} = enable-runtime-library=true
+// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
+// DEFINE: mlir-cpu-runner \
+// DEFINE:  -e entry -entry-point-result=void  \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE: FileCheck %s
+//
+// RUN: %{command}
+//
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{option} = enable-runtime-library=false
+// RUN: %{command}
+//
+// Do the same run, but now with direct IR generation and vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
+// RUN: %{command}
+
+// Product reductions - kept in a seperate file as these are not supported by
+// the AArch64 SVE backend (so the set-up is a bit different to
+// sparse_reducitons.mlir)
+
+#SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
+#DV = #sparse_tensor.encoding<{ dimLevelType = [ "dense"      ] }>
+
+#trait_reduction = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> ()>    // x (scalar out)
+  ],
+  iterator_types = ["reduction"],
+  doc = "x += OPER_i a(i)"
+}
+
+// An example of vector reductions.
+module {
+
+  func.func @prod_reduction_i32(%arga: tensor<32xi32, #DV>,
+                           %argx: tensor<i32>) -> tensor<i32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xi32, #DV>)
+      outs(%argx: tensor<i32>) {
+        ^bb(%a: i32, %x: i32):
+          %0 = arith.muli %x, %a : i32
+          linalg.yield %0 : i32
+    } -> tensor<i32>
+    return %0 : tensor<i32>
+  }
+
+  func.func @prod_reduction_f32(%arga: tensor<32xf32, #DV>,
+                           %argx: tensor<f32>) -> tensor<f32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xf32, #DV>)
+      outs(%argx: tensor<f32>) {
+        ^bb(%a: f32, %x: f32):
+          %0 = arith.mulf %x, %a : f32
+          linalg.yield %0 : f32
+    } -> tensor<f32>
+    return %0 : tensor<f32>
+  }
+
+  func.func @dump_i32(%arg0 : tensor<i32>) {
+    %v = tensor.extract %arg0[] : tensor<i32>
+    vector.print %v : i32
+    return
+  }
+
+  func.func @dump_f32(%arg0 : tensor<f32>) {
+    %v = tensor.extract %arg0[] : tensor<f32>
+    vector.print %v : f32
+    return
+  }
+
+  func.func @entry() {
+    %ri = arith.constant dense< 7   > : tensor<i32>
+    %rf = arith.constant dense< 2.0 > : tensor<f32>
+
+    %c_1_i32 = arith.constant dense<[
+      1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 3
+    ]> : tensor<32xi32>
+
+    %c_1_f32 = arith.constant dense<[
+      1.0, 1.0, 1.0, 3.5, 1.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0
+    ]> : tensor<32xf32>
+
+    // Convert constants to annotated tensors.
+    %dense_input_i32 = sparse_tensor.convert %c_1_i32
+      : tensor<32xi32> to tensor<32xi32, #DV>
+    %dense_input_f32 = sparse_tensor.convert %c_1_f32
+      : tensor<32xf32> to tensor<32xf32, #DV>
+
+    // Call the kernels.
+    %2 = call @prod_reduction_i32(%dense_input_i32, %ri)
+       : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
+    %3 = call @prod_reduction_f32(%dense_input_f32, %rf)
+       : (tensor<32xf32, #DV>, tensor<f32>) -> tensor<f32>
+
+    // Verify results.
+    //
+    // CHECK: 3087
+    // CHECK: 168
+    //
+    call @dump_i32(%2) : (tensor<i32>) -> ()
+    call @dump_f32(%3) : (tensor<f32>) -> ()
+
+    // Release the resources.
+    bufferization.dealloc_tensor %dense_input_i32  : tensor<32xi32, #DV>
+    bufferization.dealloc_tensor %dense_input_f32  : tensor<32xf32, #DV>
+
+    return
+  }
+}
index 76d23c1..dc1da5e 100644 (file)
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
old mode 100755 (executable)
new mode 100644 (file)
index 35cfe58..669840e
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #SM = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
 
index 182d7ae..2c872e3 100644 (file)
@@ -1,15 +1,26 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>
 
index de44ae3..1bb3f39 100644 (file)
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
+// REDEFINE: lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
index 43014dd..588bcb0 100644 (file)
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test_symmetric.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test_symmetric.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test_symmetric.mtx" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>