From fb0b035e35ba1203912c7a756697d30c271fedb3 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski@gmail.com>
Date: Sat, 25 Mar 2023 20:01:34 +0000
Subject: [PATCH] [mlir-cpu-runner] Add support for `-mattr` and `-march` flags

This patch adds support for `-mattr` and `-march` in mlir-cpu-runner.
With this change, one should be able to consistently use mlir-cpu-runner
for MLIR's integration tests (instead of e.g. resorting to lli when some
additional flags are needed). This is demonstrated in
concatenate_dim_1.mlir.

In order to support the new flags, this patch makes sure that
MLIR's ExecutionEngine/JITRunner (that mlir-cpu-runner is built on top of):
  * takes into account the new command line flags when creating
    TargetMachine,
  * avoids recreating TargetMachine if one is already available,
  * creates LLVM's DataLayout based on the previously configured
    TargetMachine.
This is necessary in order to make sure that the command line
configuration is propagated correctly to the backend code generator.

A few additional updates are made in order to facilitate this change,
including support for debug dumps from JITRunner.

Differential Revision: https://reviews.llvm.org/D146917
---
 mlir/examples/toy/Ch6/toyc.cpp                     | 16 ++++-
 mlir/examples/toy/Ch7/toyc.cpp                     | 16 ++++-
 .../include/mlir/ExecutionEngine/ExecutionEngine.h | 15 ++--
 mlir/lib/ExecutionEngine/ExecutionEngine.cpp       | 66 ++++++++----------
 mlir/lib/ExecutionEngine/JitRunner.cpp             | 79 ++++++++++++++++------
 .../SparseTensor/CPU/concatenate_dim_1.mlir        | 20 ++----
 mlir/test/mlir-cpu-runner/verify-flags.mlir        | 14 ++++
 7 files changed, 147 insertions(+), 79 deletions(-)
 create mode 100644 mlir/test/mlir-cpu-runner/verify-flags.mlir

diff --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp
index 830df1c..60b7da0 100644
--- a/mlir/examples/toy/Ch6/toyc.cpp
+++ b/mlir/examples/toy/Ch6/toyc.cpp
@@ -215,7 +215,21 @@ int dumpLLVMIR(mlir::ModuleOp module) {
   // Initialize LLVM targets.
   llvm::InitializeNativeTarget();
   llvm::InitializeNativeTargetAsmPrinter();
-  mlir::ExecutionEngine::setupTargetTriple(llvmModule.get());
+
+  // Configure the LLVM Module
+  auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost();
+  if (!tmBuilderOrError) {
+    llvm::errs() << "Could not create JITTargetMachineBuilder\n";
+    return -1;
+  }
+
+  auto tmOrError = tmBuilderOrError->createTargetMachine();
+  if (!tmOrError) {
+    llvm::errs() << "Could not create TargetMachine\n";
+    return -1;
+  }
+  mlir::ExecutionEngine::setupTargetTripleAndDataLayout(llvmModule.get(),
+                                                        tmOrError.get().get());
 
   /// Optionally run an optimization pipeline over the llvm module.
   auto optPipeline = mlir::makeOptimizingTransformer(
diff --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp
index 056f2a8..f074bf6 100644
--- a/mlir/examples/toy/Ch7/toyc.cpp
+++ b/mlir/examples/toy/Ch7/toyc.cpp
@@ -216,7 +216,21 @@ int dumpLLVMIR(mlir::ModuleOp module) {
   // Initialize LLVM targets.
   llvm::InitializeNativeTarget();
   llvm::InitializeNativeTargetAsmPrinter();
-  mlir::ExecutionEngine::setupTargetTriple(llvmModule.get());
+
+  // Create target machine and configure the LLVM Module
+  auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost();
+  if (!tmBuilderOrError) {
+    llvm::errs() << "Could not create JITTargetMachineBuilder\n";
+    return -1;
+  }
+
+  auto tmOrError = tmBuilderOrError->createTargetMachine();
+  if (!tmOrError) {
+    llvm::errs() << "Could not create TargetMachine\n";
+    return -1;
+  }
+  mlir::ExecutionEngine::setupTargetTripleAndDataLayout(llvmModule.get(),
+                                                        tmOrError.get().get());
 
   /// Optionally run an optimization pipeline over the llvm module.
   auto optPipeline = mlir::makeOptimizingTransformer(
diff --git a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
index 13d3c0f6..c62a18d 100644
--- a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
+++ b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
@@ -108,9 +108,12 @@ public:
   ExecutionEngine(bool enableObjectDump, bool enableGDBNotificationListener,
                   bool enablePerfNotificationListener);
 
-  /// Creates an execution engine for the given MLIR IR.
+  /// Creates an execution engine for the given MLIR IR. If TargetMachine is
+  /// not provided, default TM is created (i.e. ignoring any command line flags
+  /// that could affect the set-up).
   static llvm::Expected<std::unique_ptr<ExecutionEngine>>
-  create(Operation *op, const ExecutionEngineOptions &options = {});
+  create(Operation *op, const ExecutionEngineOptions &options = {},
+         std::unique_ptr<llvm::TargetMachine> tm = nullptr);
 
   /// Looks up a packed-argument function wrapping the function with the given
   /// name and returns a pointer to it. Propagates errors in case of failure.
@@ -180,9 +183,11 @@ public:
     return invokePacked(adapterName, argsArray);
   }
 
-  /// Set the target triple on the module. This is implicitly done when creating
-  /// the engine.
-  static bool setupTargetTriple(llvm::Module *llvmModule);
+  /// Set the target triple and the data layout for the input module based on
+  /// the input TargetMachine. This is implicitly done when creating the
+  /// engine.
+  static void setupTargetTripleAndDataLayout(llvm::Module *llvmModule,
+                                             llvm::TargetMachine *tm);
 
   /// Dump object code to output file `filename`.
   void dumpToObjectFile(StringRef filename);
diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
index b122fdb..655d093 100644
--- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -128,35 +128,10 @@ void ExecutionEngine::registerSymbols(
           mainJitDylib.getExecutionSession(), jit->getDataLayout())))));
 }
 
-// Setup LLVM target triple from the current machine.
-bool ExecutionEngine::setupTargetTriple(Module *llvmModule) {
-  // Setup the machine properties from the current architecture.
-  auto targetTriple = llvm::sys::getDefaultTargetTriple();
-  std::string errorMessage;
-  const auto *target =
-      llvm::TargetRegistry::lookupTarget(targetTriple, errorMessage);
-  if (!target) {
-    errs() << "NO target: " << errorMessage << "\n";
-    return true;
-  }
-
-  std::string cpu(llvm::sys::getHostCPUName());
-  llvm::SubtargetFeatures features;
-  llvm::StringMap<bool> hostFeatures;
-
-  if (llvm::sys::getHostCPUFeatures(hostFeatures))
-    for (const auto &[feature, isEnabled] : hostFeatures)
-      features.AddFeature(feature, isEnabled);
-
-  std::unique_ptr<llvm::TargetMachine> machine(target->createTargetMachine(
-      targetTriple, cpu, features.getString(), {}, {}));
-  if (!machine) {
-    errs() << "Unable to create target machine\n";
-    return true;
-  }
-  llvmModule->setDataLayout(machine->createDataLayout());
-  llvmModule->setTargetTriple(targetTriple);
-  return false;
+void ExecutionEngine::setupTargetTripleAndDataLayout(Module *llvmModule,
+                                                     llvm::TargetMachine *tm) {
+  llvmModule->setDataLayout(tm->createDataLayout());
+  llvmModule->setTargetTriple(tm->getTargetTriple().getTriple());
 }
 
 static std::string makePackedFunctionName(StringRef name) {
@@ -248,7 +223,8 @@ ExecutionEngine::ExecutionEngine(bool enableObjectDump,
 }
 
 Expected<std::unique_ptr<ExecutionEngine>>
-ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options) {
+ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options,
+                        std::unique_ptr<llvm::TargetMachine> tm) {
   auto engine = std::make_unique<ExecutionEngine>(
       options.enableObjectDump, options.enableGDBNotificationListener,
       options.enablePerfNotificationListener);
@@ -267,10 +243,26 @@ ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options) {
                         : translateModuleToLLVMIR(m, *ctx);
   if (!llvmModule)
     return makeStringError("could not convert to LLVM IR");
-  // FIXME: the triple should be passed to the translation or dialect conversion
-  // instead of this.  Currently, the LLVM module created above has no triple
-  // associated with it.
-  setupTargetTriple(llvmModule.get());
+
+  // If no valid TargetMachine was passed, create a default TM ignoring any
+  // input arguments from the user.
+  if (!tm) {
+    auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost();
+    if (!tmBuilderOrError)
+      return tmBuilderOrError.takeError();
+
+    auto tmOrError = tmBuilderOrError->createTargetMachine();
+    if (!tmOrError)
+      return tmOrError.takeError();
+    tm = std::move(tmOrError.get());
+  }
+
+  // TODO: Currently, the LLVM module created above has no triple associated
+  // with it. Instead, the triple is extracted from the TargetMachine, which is
+  // either based on the host defaults or command line arguments when specified
+  // (set-up by callers of this method). It could also be passed to the
+  // translation or dialect conversion instead of this.
+  setupTargetTripleAndDataLayout(llvmModule.get(), tm.get());
   packFunctionArguments(llvmModule.get());
 
   auto dataLayout = llvmModule->getDataLayout();
@@ -328,10 +320,7 @@ ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options) {
       -> Expected<std::unique_ptr<IRCompileLayer::IRCompiler>> {
     if (options.jitCodeGenOptLevel)
       jtmb.setCodeGenOptLevel(*options.jitCodeGenOptLevel);
-    auto tm = jtmb.createTargetMachine();
-    if (!tm)
-      return tm.takeError();
-    return std::make_unique<TMOwningSimpleCompiler>(std::move(*tm),
+    return std::make_unique<TMOwningSimpleCompiler>(std::move(tm),
                                                     engine->cache.get());
   };
 
@@ -340,6 +329,7 @@ ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options) {
       cantFail(llvm::orc::LLJITBuilder()
                    .setCompileFunctionCreator(compileFunctionCreator)
                    .setObjectLinkingLayerCreator(objectLinkingLayerCreator)
+                   .setDataLayout(dataLayout)
                    .create());
 
   // Add a ThreadSafemodule to the engine and return.
diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp
index e1c5d95..18405a9 100644
--- a/mlir/lib/ExecutionEngine/JitRunner.cpp
+++ b/mlir/lib/ExecutionEngine/JitRunner.cpp
@@ -32,14 +32,17 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassNameParser.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include <cstdint>
 #include <numeric>
-#include <utility>
 #include <optional>
+#include <utility>
+
+#define DEBUG_TYPE "jit-runner"
 
 using namespace mlir;
 using llvm::Error;
@@ -75,6 +78,15 @@ struct Options {
                             llvm::cl::desc("Run opt passes and codegen at O3"),
                             llvm::cl::cat(optFlags)};
 
+  llvm::cl::list<std::string> mAttrs{
+      "mattr", llvm::cl::MiscFlags::CommaSeparated,
+      llvm::cl::desc("Target specific attributes (-mattr=help for details)"),
+      llvm::cl::value_desc("a1,+a2,-a3,..."), llvm::cl::cat(optFlags)};
+
+  llvm::cl::opt<std::string> mArch{
+      "march",
+      llvm::cl::desc("Architecture to generate code for (see --version)")};
+
   llvm::cl::OptionCategory clOptionsCategory{"linking options"};
   llvm::cl::list<std::string> clSharedLibs{
       "shared-libs", llvm::cl::desc("Libraries to link dynamically"),
@@ -165,9 +177,10 @@ static std::optional<unsigned> getCommandLineOptLevel(Options &options) {
 }
 
 // JIT-compile the given module and run "entryPoint" with "args" as arguments.
-static Error compileAndExecute(Options &options, Operation *module,
-                               StringRef entryPoint,
-                               CompileAndExecuteConfig config, void **args) {
+static Error
+compileAndExecute(Options &options, Operation *module, StringRef entryPoint,
+                  CompileAndExecuteConfig config, void **args,
+                  std::unique_ptr<llvm::TargetMachine> tm = nullptr) {
   std::optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel;
   if (auto clOptLevel = getCommandLineOptLevel(options))
     jitCodeGenOptLevel = static_cast<llvm::CodeGenOpt::Level>(*clOptLevel);
@@ -232,7 +245,8 @@ static Error compileAndExecute(Options &options, Operation *module,
   engineOptions.jitCodeGenOptLevel = jitCodeGenOptLevel;
   engineOptions.sharedLibPaths = executionEngineLibs;
   engineOptions.enableObjectDump = true;
-  auto expectedEngine = mlir::ExecutionEngine::create(module, engineOptions);
+  auto expectedEngine =
+      mlir::ExecutionEngine::create(module, engineOptions, std::move(tm));
   if (!expectedEngine)
     return expectedEngine.takeError();
 
@@ -258,16 +272,16 @@ static Error compileAndExecute(Options &options, Operation *module,
   return Error::success();
 }
 
-static Error compileAndExecuteVoidFunction(Options &options, Operation *module,
-                                           StringRef entryPoint,
-                                           CompileAndExecuteConfig config) {
+static Error compileAndExecuteVoidFunction(
+    Options &options, Operation *module, StringRef entryPoint,
+    CompileAndExecuteConfig config, std::unique_ptr<llvm::TargetMachine> tm) {
   auto mainFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(
       SymbolTable::lookupSymbolIn(module, entryPoint));
   if (!mainFunction || mainFunction.empty())
     return makeStringError("entry point not found");
   void *empty = nullptr;
   return compileAndExecute(options, module, entryPoint, std::move(config),
-                           &empty);
+                           &empty, std::move(tm));
 }
 
 template <typename Type>
@@ -302,9 +316,9 @@ Error checkCompatibleReturnType<float>(LLVM::LLVMFuncOp mainFunction) {
   return Error::success();
 }
 template <typename Type>
-Error compileAndExecuteSingleReturnFunction(Options &options, Operation *module,
-                                            StringRef entryPoint,
-                                            CompileAndExecuteConfig config) {
+Error compileAndExecuteSingleReturnFunction(
+    Options &options, Operation *module, StringRef entryPoint,
+    CompileAndExecuteConfig config, std::unique_ptr<llvm::TargetMachine> tm) {
   auto mainFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(
       SymbolTable::lookupSymbolIn(module, entryPoint));
   if (!mainFunction || mainFunction.isExternal())
@@ -323,8 +337,9 @@ Error compileAndExecuteSingleReturnFunction(Options &options, Operation *module,
     void *data;
   } data;
   data.data = &res;
-  if (auto error = compileAndExecute(options, module, entryPoint,
-                                     std::move(config), (void **)&data))
+  if (auto error =
+          compileAndExecute(options, module, entryPoint, std::move(config),
+                            (void **)&data, std::move(tm)))
     return error;
 
   // Intentional printing of the output so we can test.
@@ -337,6 +352,8 @@ Error compileAndExecuteSingleReturnFunction(Options &options, Operation *module,
 /// standard C++ main functions.
 int mlir::JitRunnerMain(int argc, char **argv, const DialectRegistry &registry,
                         JitRunnerConfig config) {
+  llvm::ExitOnError exitOnErr;
+
   // Create the options struct containing the command line options for the
   // runner. This must come before the command line options are parsed.
   Options options;
@@ -347,8 +364,8 @@ int mlir::JitRunnerMain(int argc, char **argv, const DialectRegistry &registry,
     if (j)
       llvm::outs() << "true\n";
     else {
-      llvm::consumeError(j.takeError());
       llvm::outs() << "false\n";
+      exitOnErr(j.takeError());
     }
     return 0;
   }
@@ -376,12 +393,33 @@ int mlir::JitRunnerMain(int argc, char **argv, const DialectRegistry &registry,
     llvm::errs() << "Failed to create a JITTargetMachineBuilder for the host\n";
     return EXIT_FAILURE;
   }
+
+  // Configure TargetMachine builder based on the command line options
+  llvm::SubtargetFeatures features;
+  if (!options.mAttrs.empty()) {
+    for (unsigned i = 0; i != options.mAttrs.size(); ++i)
+      features.AddFeature(options.mAttrs[i]);
+    tmBuilderOrError->addFeatures(features.getFeatures());
+  }
+
+  if (!options.mArch.empty()) {
+    tmBuilderOrError->getTargetTriple().setArchName(options.mArch);
+  }
+
+  // Build TargetMachine
   auto tmOrError = tmBuilderOrError->createTargetMachine();
+
   if (!tmOrError) {
     llvm::errs() << "Failed to create a TargetMachine for the host\n";
-    return EXIT_FAILURE;
+    exitOnErr(tmOrError.takeError());
   }
 
+  LLVM_DEBUG({
+    llvm::dbgs() << "  JITTargetMachineBuilder is "
+                 << llvm::orc::JITTargetMachineBuilderPrinter(*tmBuilderOrError,
+                                                              "\n");
+  });
+
   CompileAndExecuteConfig compileAndExecuteConfig;
   if (optLevel) {
     compileAndExecuteConfig.transformer = mlir::makeOptimizingTransformer(
@@ -392,7 +430,8 @@ int mlir::JitRunnerMain(int argc, char **argv, const DialectRegistry &registry,
 
   // Get the function used to compile and execute the module.
   using CompileAndExecuteFnT =
-      Error (*)(Options &, Operation *, StringRef, CompileAndExecuteConfig);
+      Error (*)(Options &, Operation *, StringRef, CompileAndExecuteConfig,
+                std::unique_ptr<llvm::TargetMachine> tm);
   auto compileAndExecuteFn =
       StringSwitch<CompileAndExecuteFnT>(options.mainFuncType.getValue())
           .Case("i32", compileAndExecuteSingleReturnFunction<int32_t>)
@@ -402,9 +441,9 @@ int mlir::JitRunnerMain(int argc, char **argv, const DialectRegistry &registry,
           .Default(nullptr);
 
   Error error = compileAndExecuteFn
-                    ? compileAndExecuteFn(options, m.get(),
-                                          options.mainFuncName.getValue(),
-                                          compileAndExecuteConfig)
+                    ? compileAndExecuteFn(
+                          options, m.get(), options.mainFuncName.getValue(),
+                          compileAndExecuteConfig, std::move(tmOrError.get()))
                     : makeStringError("unsupported function type");
 
   int exitCode = EXIT_SUCCESS;
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate_dim_1.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate_dim_1.mlir
index 17a5446..15116e4 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate_dim_1.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate_dim_1.mlir
@@ -1,8 +1,9 @@
 // DEFINE: %{option} = enable-runtime-library=true
+// DEFINE: %{run_option} =
 // DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
 // DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
-// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext %{run_option} | \
 // DEFINE: FileCheck %s
 //
 // RUN: %{compile} | %{run}
@@ -11,21 +12,12 @@
 // REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true"
 // RUN: %{compile} | %{run}
 //
-// Do the same run, but now with direct IR generation and vectorization.
-// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
+// Do the same run, but now with direct IR generation and vectorization. Enable
+// Arm SVE if supported.
+// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=4 enable-arm-sve=%ENABLE_VLA reassociate-fp-reductions=true enable-index-optimizations=true"
+// REDEFINE: %{run_option} = "%VLA_ARCH_ATTR_OPTIONS"
 // RUN: %{compile} | %{run}
 
-// Do the same run, but now with direct IR generation and, if available, VLA
-// vectorization.
-// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
-// REDEFINE: %{run} = %lli \
-// REDEFINE:   --entry-function=entry_lli \
-// REDEFINE:   --extra-module=%S/Inputs/main_for_lli.ll \
-// REDEFINE:   %VLA_ARCH_ATTR_OPTIONS \
-// REDEFINE:   --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext --dlopen=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
-// REDEFINE: FileCheck %s
-// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
-
 #MAT_C_C = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
 #MAT_D_C = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}>
 #MAT_C_D = #sparse_tensor.encoding<{dimLevelType = ["compressed", "dense"]}>
diff --git a/mlir/test/mlir-cpu-runner/verify-flags.mlir b/mlir/test/mlir-cpu-runner/verify-flags.mlir
new file mode 100644
index 0000000..8788122
--- /dev/null
+++ b/mlir/test/mlir-cpu-runner/verify-flags.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-cpu-runner %s --debug-only=jit-runner -mattr=+foo_bar -e entry -entry-point-result=void 2>&1 | FileCheck %s --check-prefixes=MATTR
+// RUN: not mlir-cpu-runner %s --debug-only=jit-runner -march=bar_foo -e entry -entry-point-result=void 2>&1 | FileCheck %s --check-prefixes=MARCH
+
+// Verify that command line args do affect the configuration
+
+// MATTR: Features = 
+// MATTR-SAME: +foo_bar
+
+// MARCH: Failed to create a TargetMachine for the host
+// MARCH-NEXT: No available targets are compatible with triple "bar_foo-{{.*}}"
+
+llvm.func @entry() -> () {
+  llvm.return
+}
-- 
2.7.4