From 66d555aa33516f26adb833ef3ab3754926fc97cd Mon Sep 17 00:00:00 2001
From: Javier Setoain <javier.setoain@gmail.com>
Date: Mon, 12 Dec 2022 18:02:59 +0000
Subject: [PATCH] [mlir][sparse][ArmSVE] Enable sparse integration tests for
 ArmSVE

This patch adds the logic necessary to target the sparse-tensor dialect
integration tests for SVE. As the LLVM backend for AArch64 does not
currently support product reductions, the corresponding tests are
disabled for SVE.

Not all tests have been updated yet. The remaining tests will be
refactored in a separate patch shortly.

Differential Revision: https://reviews.llvm.org/D121304

Co-authored-by: Andrzej Warzynski <andrzej.warzynski@arm.com>
---
 .../SparseTensor/CPU/Inputs/main_for_lli.ll        |   8 ++
 .../Dialect/SparseTensor/CPU/lit.local.cfg         |  28 +++++
 .../Dialect/SparseTensor/CPU/sparse_cast.mlir      |  21 +++-
 .../SparseTensor/CPU/sparse_filter_conv2d.mlir     |  21 +++-
 .../Dialect/SparseTensor/CPU/sparse_flatten.mlir   |  22 +++-
 .../SparseTensor/CPU/sparse_index_dense.mlir       |  21 +++-
 .../Dialect/SparseTensor/CPU/sparse_matvec.mlir    |  27 +++--
 .../Dialect/SparseTensor/CPU/sparse_mttkrp.mlir    |  22 +++-
 .../SparseTensor/CPU/sparse_out_simple.mlir        |  22 +++-
 .../SparseTensor/CPU/sparse_quantized_matmul.mlir  |  21 +++-
 .../SparseTensor/CPU/sparse_reductions.mlir        |  54 +++-------
 .../SparseTensor/CPU/sparse_reductions_prod.mlir   | 115 +++++++++++++++++++++
 .../SparseTensor/CPU/sparse_sampled_matmul.mlir    |  22 +++-
 .../SparseTensor/CPU/sparse_sampled_mm_fusion.mlir |  21 +++-
 .../Dialect/SparseTensor/CPU/sparse_scale.mlir     |  19 +++-
 .../Dialect/SparseTensor/CPU/sparse_spmm.mlir      |  22 +++-
 .../Dialect/SparseTensor/CPU/sparse_sum.mlir       |  22 +++-
 17 files changed, 384 insertions(+), 104 deletions(-)
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir
 mode change 100755 => 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir

diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll b/mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll
new file mode 100644
index 0000000..7b74618
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/Inputs/main_for_lli.ll
@@ -0,0 +1,8 @@
+; Dummy wrapper required by lli, which does not support void functions (i.e.
+; it fails if non-zero code is returned)
+define i32 @entry_lli() {
+  call void @entry()
+  ret i32 0
+}
+
+declare void @entry()
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
index 83247d7..dba6373 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
@@ -3,3 +3,31 @@ import sys
 # No JIT on win32.
 if sys.platform == 'win32':
     config.unsupported = True
+
+# ArmSVE tests must be enabled via build flag.
+if config.mlir_run_arm_sve_tests:
+    config.substitutions.append(('%ENABLE_VLA', 'true'))
+    config.substitutions.append(('%VLA_ARCH_ATTR_OPTIONS', '--march=aarch64 --mattr="+sve"'))
+    lli_cmd = 'lli'
+    if config.arm_emulator_lli_executable:
+       lli_cmd = config.arm_emulator_lli_executable
+
+    if config.arm_emulator_utils_lib_dir:
+        config.substitutions.append(('%mlir_native_utils_lib_dir', config.arm_emulator_utils_lib_dir))
+    else:
+        config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir))
+
+    if config.arm_emulator_executable:
+        # Run test in emulator (qemu or armie).
+        emulation_cmd = config.arm_emulator_executable
+        if config.arm_emulator_options:
+            emulation_cmd = emulation_cmd + ' ' + config.arm_emulator_options
+        emulation_cmd = emulation_cmd + ' ' + lli_cmd
+        config.substitutions.append(('%lli', emulation_cmd))
+    else:
+        config.substitutions.append(('%lli', lli_cmd))
+else:
+    config.substitutions.append(('%ENABLE_VLA', 'false'))
+    config.substitutions.append(('%VLA_ARCH_ATTR_OPTIONS', ''))
+    config.substitutions.append(('%lli', 'lli'))
+    config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir))
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
index 13a9dcc..4cc6126 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
index 5c1e993..952f01a 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
index 6917274..28f9033 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext --dlopen=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
index af64c09..c5ce259 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #SparseVector = #sparse_tensor.encoding<{
   dimLevelType = ["compressed"]
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
index 2c18c2b..d9e3143 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
@@ -1,28 +1,39 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
-//
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with parallelization strategy.
 // REDEFINE: %{option} = "enable-runtime-library=true parallelization-strategy=any-storage-any-loop"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and parallelization strategy.
 // REDEFINE: %{option} = "enable-runtime-library=false parallelization-strategy=any-storage-any-loop"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
index 375db9a..55db2ba 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns"  \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext -dlopen=%mlir_lib_dir/libmlir_runner_utils%shlibext| \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
index 630d1b7..8a36bdb 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
index 2c1200a..0bcd0db 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
index 65f6805..b962ae4 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
@@ -1,19 +1,31 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+
+// Reduction in this file are supported by the AArch64 SVE backend
 
 #SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
 #DV = #sparse_tensor.encoding<{ dimLevelType = [ "dense"      ] }>
@@ -54,30 +66,6 @@ module {
     return %0 : tensor<f32>
   }
 
-  func.func @prod_reduction_i32(%arga: tensor<32xi32, #DV>,
-                           %argx: tensor<i32>) -> tensor<i32> {
-    %0 = linalg.generic #trait_reduction
-      ins(%arga: tensor<32xi32, #DV>)
-      outs(%argx: tensor<i32>) {
-        ^bb(%a: i32, %x: i32):
-          %0 = arith.muli %x, %a : i32
-          linalg.yield %0 : i32
-    } -> tensor<i32>
-    return %0 : tensor<i32>
-  }
-
-  func.func @prod_reduction_f32(%arga: tensor<32xf32, #DV>,
-                           %argx: tensor<f32>) -> tensor<f32> {
-    %0 = linalg.generic #trait_reduction
-      ins(%arga: tensor<32xf32, #DV>)
-      outs(%argx: tensor<f32>) {
-        ^bb(%a: f32, %x: f32):
-          %0 = arith.mulf %x, %a : f32
-          linalg.yield %0 : f32
-    } -> tensor<f32>
-    return %0 : tensor<f32>
-  }
-
   func.func @and_reduction_i32(%arga: tensor<32xi32, #DV>,
                           %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
@@ -169,10 +157,6 @@ module {
        : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
     %1 = call @sum_reduction_f32(%sparse_input_f32, %rf)
        : (tensor<32xf32, #SV>, tensor<f32>) -> tensor<f32>
-    %2 = call @prod_reduction_i32(%dense_input_i32, %ri)
-       : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
-    %3 = call @prod_reduction_f32(%dense_input_f32, %rf)
-       : (tensor<32xf32, #DV>, tensor<f32>) -> tensor<f32>
     %4 = call @and_reduction_i32(%dense_input_i32, %ri)
        : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
     %5 = call @or_reduction_i32(%sparse_input_i32, %ri)
@@ -184,16 +168,12 @@ module {
     //
     // CHECK: 26
     // CHECK: 27.5
-    // CHECK: 3087
-    // CHECK: 168
     // CHECK: 1
     // CHECK: 15
     // CHECK: 10
     //
     call @dump_i32(%0) : (tensor<i32>) -> ()
     call @dump_f32(%1) : (tensor<f32>) -> ()
-    call @dump_i32(%2) : (tensor<i32>) -> ()
-    call @dump_f32(%3) : (tensor<f32>) -> ()
     call @dump_i32(%4) : (tensor<i32>) -> ()
     call @dump_i32(%5) : (tensor<i32>) -> ()
     call @dump_i32(%6) : (tensor<i32>) -> ()
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir
new file mode 100644
index 0000000..a4942b9
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir
@@ -0,0 +1,115 @@
+// DEFINE: %{option} = enable-runtime-library=true
+// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
+// DEFINE: mlir-cpu-runner \
+// DEFINE:  -e entry -entry-point-result=void  \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE: FileCheck %s
+//
+// RUN: %{command}
+//
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{option} = enable-runtime-library=false
+// RUN: %{command}
+//
+// Do the same run, but now with direct IR generation and vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
+// RUN: %{command}
+
+// Product reductions - kept in a seperate file as these are not supported by
+// the AArch64 SVE backend (so the set-up is a bit different to
+// sparse_reducitons.mlir)
+
+#SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
+#DV = #sparse_tensor.encoding<{ dimLevelType = [ "dense"      ] }>
+
+#trait_reduction = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> ()>    // x (scalar out)
+  ],
+  iterator_types = ["reduction"],
+  doc = "x += OPER_i a(i)"
+}
+
+// An example of vector reductions.
+module {
+
+  func.func @prod_reduction_i32(%arga: tensor<32xi32, #DV>,
+                           %argx: tensor<i32>) -> tensor<i32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xi32, #DV>)
+      outs(%argx: tensor<i32>) {
+        ^bb(%a: i32, %x: i32):
+          %0 = arith.muli %x, %a : i32
+          linalg.yield %0 : i32
+    } -> tensor<i32>
+    return %0 : tensor<i32>
+  }
+
+  func.func @prod_reduction_f32(%arga: tensor<32xf32, #DV>,
+                           %argx: tensor<f32>) -> tensor<f32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xf32, #DV>)
+      outs(%argx: tensor<f32>) {
+        ^bb(%a: f32, %x: f32):
+          %0 = arith.mulf %x, %a : f32
+          linalg.yield %0 : f32
+    } -> tensor<f32>
+    return %0 : tensor<f32>
+  }
+
+  func.func @dump_i32(%arg0 : tensor<i32>) {
+    %v = tensor.extract %arg0[] : tensor<i32>
+    vector.print %v : i32
+    return
+  }
+
+  func.func @dump_f32(%arg0 : tensor<f32>) {
+    %v = tensor.extract %arg0[] : tensor<f32>
+    vector.print %v : f32
+    return
+  }
+
+  func.func @entry() {
+    %ri = arith.constant dense< 7   > : tensor<i32>
+    %rf = arith.constant dense< 2.0 > : tensor<f32>
+
+    %c_1_i32 = arith.constant dense<[
+      1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 3
+    ]> : tensor<32xi32>
+
+    %c_1_f32 = arith.constant dense<[
+      1.0, 1.0, 1.0, 3.5, 1.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0
+    ]> : tensor<32xf32>
+
+    // Convert constants to annotated tensors.
+    %dense_input_i32 = sparse_tensor.convert %c_1_i32
+      : tensor<32xi32> to tensor<32xi32, #DV>
+    %dense_input_f32 = sparse_tensor.convert %c_1_f32
+      : tensor<32xf32> to tensor<32xf32, #DV>
+
+    // Call the kernels.
+    %2 = call @prod_reduction_i32(%dense_input_i32, %ri)
+       : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
+    %3 = call @prod_reduction_f32(%dense_input_f32, %rf)
+       : (tensor<32xf32, #DV>, tensor<f32>) -> tensor<f32>
+
+    // Verify results.
+    //
+    // CHECK: 3087
+    // CHECK: 168
+    //
+    call @dump_i32(%2) : (tensor<i32>) -> ()
+    call @dump_f32(%3) : (tensor<f32>) -> ()
+
+    // Release the resources.
+    bufferization.dealloc_tensor %dense_input_i32  : tensor<32xi32, #DV>
+    bufferization.dealloc_tensor %dense_input_f32  : tensor<32xf32, #DV>
+
+    return
+  }
+}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
index 76d23c1..dc1da5e 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
old mode 100755
new mode 100644
index 35cfe58..669840e
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
@@ -1,19 +1,30 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #SM = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
index 182d7ae..2c872e3 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
@@ -1,15 +1,26 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: mlir-cpu-runner \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
index de44ae3..1bb3f39 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
+// REDEFINE: lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
index 43014dd..588bcb0 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
@@ -1,20 +1,32 @@
 // DEFINE: %{option} = enable-runtime-library=true
-// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
-// DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test_symmetric.mtx" \
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test_symmetric.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
 // DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{option} = enable-runtime-library=false
-// RUN: %{command}
+// RUN: %{compile} | %{run}
 //
 // Do the same run, but now with direct IR generation and vectorization.
 // REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
-// RUN: %{command}
+// RUN: %{compile} | %{run}
+
+// If SVE is available, do the same run, but now with direct IR generation and VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4  enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test_symmetric.mtx" \
+// REDEFINE: %lli \
+// REDEFINE:   --entry-function=entry_lli \
+// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
 
 !Filename = !llvm.ptr<i8>
 
-- 
2.7.4