[mlgo] Introduce an "InteractiveModelRunner"
authorMircea Trofin <mtrofin@google.com>
Thu, 26 Jan 2023 15:58:17 +0000 (07:58 -0800)
committerMircea Trofin <mtrofin@google.com>
Sat, 28 Jan 2023 01:03:28 +0000 (17:03 -0800)
This is a model runner for ML researchers using environments like
CompilerGym. In such environments, researchers host the compiler and
want to be able to observe the problem space (features) at each decision
step of some optimization pass, at which point the compiler is stopped,
waiting for the host makes a decision and provide an advice back to
the compiler, which then continues its normal operation, and so on.

The InteractiveModelRunner supports this scenario for the feature set
exposed by the compiler at a given time. It uses 2 files - ideally FIFO
pipes - one to pass data to the host, the other to get advices back from
the host. This means this scenario is supported with no special
dependencies. The file creation and deletion is the responsibility of
the host. Hooking up this model evaluator to a MLGO-ed pass is the
responsibilty of the pass author, and subsequent patches will do so for
the current set of mlgo passes, and offer an API to easily "just opt in"
by default when mlgo-ing a new pass.

The data protocol is that of the training logger: the host sees a training
log doled out observation by observation by reading from one of the
files, and passes back its advice as a serialized tensor (i.e. tensor value
memory dump) via the other file.

There are some differences wrt the log seen during training: the
interactive model doesn't currently include the outcome (because it should be
identical to the decision, and it's also not present in the "release"
mode); and partial rewards aren't currently communicated back.

The assumption - just like with the training logger - is that the host
is co-located, thus avoiding any endianness concerns. In a distributed
environment, it is up to the hosting infrastructure to intermediate
that.

Differential Revision: https://reviews.llvm.org/D142642

llvm/include/llvm/Analysis/InteractiveModelRunner.h [new file with mode: 0644]
llvm/include/llvm/Analysis/MLModelRunner.h
llvm/include/llvm/Analysis/TensorSpec.h
llvm/include/llvm/Analysis/Utils/TrainingLogger.h
llvm/lib/Analysis/CMakeLists.txt
llvm/lib/Analysis/InteractiveModelRunner.cpp [new file with mode: 0644]
llvm/lib/Analysis/TensorSpec.cpp
llvm/unittests/Analysis/MLModelRunnerTest.cpp
llvm/unittests/Analysis/TensorSpecTest.cpp

diff --git a/llvm/include/llvm/Analysis/InteractiveModelRunner.h b/llvm/include/llvm/Analysis/InteractiveModelRunner.h
new file mode 100644 (file)
index 0000000..a9324f1
--- /dev/null
@@ -0,0 +1,61 @@
+//===- InteractiveModelRunner.h ---- "gym" ML model runner  -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_ANALYSIS_INTERACTIVEMODELRUNNER_H
+#define LLVM_ANALYSIS_INTERACTIVEMODELRUNNER_H
+
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Analysis/Utils/TrainingLogger.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/raw_ostream.h"
+#include <system_error>
+
+namespace llvm {
+
+/// A MLModelRunner that asks for advice from an external agent, or host. It
+/// uses 2 files - ideally named pipes - one to send data to that agent, and
+/// one to receive advice.
+/// The data exchange uses the training logger (Utils/TrainingLogger.h) format.
+/// Specifically, the compiler will send the log header, set the context, and
+/// send observations; the host is expected to reply with a tensor value after
+/// each observation as a binary buffer that's conforming to the shape of the
+/// advice. Interleaved, the data closely resembles the training log for a
+/// log where we don't capture the reward signal.
+///
+/// Note that the correctness of the received data is the responsibility of the
+/// host. In particular, if insufficient data were sent, the compiler will block
+/// when waiting for an advice.
+class InteractiveModelRunner : public MLModelRunner {
+public:
+  InteractiveModelRunner(LLVMContext &Ctx,
+                         const std::vector<TensorSpec> &Inputs,
+                         const TensorSpec &Advice, StringRef OutboundName,
+                         StringRef InboundName);
+
+  static bool classof(const MLModelRunner *R) {
+    return R->getKind() == MLModelRunner::Kind::Interactive;
+  }
+  void switchContext(StringRef Name) {
+    Log.switchContext(Name);
+    Log.flush();
+  }
+
+private:
+  void *evaluateUntyped() override;
+  const std::vector<TensorSpec> InputSpecs;
+  const TensorSpec OutputSpec;
+  std::error_code OutEC;
+  std::error_code InEC;
+  raw_fd_stream Inbound;
+  std::vector<char> OutputBuffer;
+  Logger Log;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_INTERACTIVEMODELRUNNER_H
index 872c0e37f00e6b920138e5457326c68bb364cfdc..6fcccf7e565a3bfcf7b917436c6b77b0c01d6f56 100644 (file)
@@ -47,7 +47,7 @@ public:
     return (const_cast<MLModelRunner *>(this))->getTensorUntyped(Index);
   }
 
-  enum class Kind : int { Unknown, Release, Development, NoOp };
+  enum class Kind : int { Unknown, Release, Development, NoOp, Interactive };
   Kind getKind() const { return Type; }
 
 protected:
index 3e0db32a220479f3e39d5b39f5cbf01cdbe0ab68..c50507b7a6b11445276f452c3da4362cc9af9107 100644 (file)
@@ -103,6 +103,9 @@ private:
   size_t ElementSize = 0;
 };
 
+/// For debugging.
+std::string tensorValueToString(const char *Buffer, const TensorSpec &Spec);
+
 /// Construct a TensorSpec from a JSON dictionary of the form:
 /// { "name": <string>,
 ///   "port": <int>,
index b7db58f67436aa8b22f5a1ada10ad280a1363e97..75b79e622a56e40e5fe88905deff7ffd6158eefc 100644 (file)
@@ -116,6 +116,7 @@ public:
   void switchContext(StringRef Name);
   void startObservation();
   void endObservation();
+  void flush() { OS->flush(); }
 
   const std::string &currentContext() const { return CurrentContext; }
 
index 7e53c0ca64e3eee9641fec553dd2928dad408523..d25eb5c702a7ed14e938f95c418715e78c3b489e 100644 (file)
@@ -76,6 +76,7 @@ add_llvm_component_library(LLVMAnalysis
   InstCount.cpp
   InstructionPrecedenceTracking.cpp
   InstructionSimplify.cpp
+  InteractiveModelRunner.cpp
   Interval.cpp
   IntervalPartition.cpp
   LazyBranchProbabilityInfo.cpp
diff --git a/llvm/lib/Analysis/InteractiveModelRunner.cpp b/llvm/lib/Analysis/InteractiveModelRunner.cpp
new file mode 100644 (file)
index 0000000..40a4b50
--- /dev/null
@@ -0,0 +1,77 @@
+//===- InteractiveModelRunner.cpp - noop ML model runner   ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A runner that communicates with an external agent via 2 file descriptors.
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/InteractiveModelRunner.h"
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define _IMR_CL_VALS(T, N) clEnumValN(TensorType::N, #T, #T),
+
+static cl::opt<TensorType> DebugReply(
+    "interactive-model-runner-echo-type", cl::init(TensorType::Invalid),
+    cl::Hidden,
+    cl::desc("The InteractiveModelRunner will echo back to stderr "
+             "the data received "
+             "from the host as the specified type (for debugging purposes)."),
+    cl::values(SUPPORTED_TENSOR_TYPES(_IMR_CL_VALS)
+                   clEnumValN(TensorType::Invalid, "disable", "Don't echo")));
+
+#undef _IMR_CL_VALS
+
+InteractiveModelRunner::InteractiveModelRunner(
+    LLVMContext &Ctx, const std::vector<TensorSpec> &Inputs,
+    const TensorSpec &Advice, StringRef OutboundName, StringRef InboundName)
+    : MLModelRunner(Ctx, MLModelRunner::Kind::Interactive, Inputs.size()),
+      InputSpecs(Inputs), OutputSpec(Advice), Inbound(InboundName, InEC),
+      OutputBuffer(OutputSpec.getTotalTensorBufferSize()),
+      Log(std::make_unique<raw_fd_ostream>(OutboundName, OutEC), InputSpecs,
+          Advice, /*IncludeReward=*/false) {
+  if (InEC) {
+    Ctx.emitError("Cannot open inbound file: " + InEC.message());
+    return;
+  }
+  if (OutEC) {
+    Ctx.emitError("Cannot open outbound file: " + OutEC.message());
+    return;
+  }
+  // Just like in the no inference case, this will allocate an appropriately
+  // sized buffer.
+  for (size_t I = 0; I < InputSpecs.size(); ++I)
+    setUpBufferForTensor(I, InputSpecs[I], nullptr);
+  Log.flush();
+}
+
+void *InteractiveModelRunner::evaluateUntyped() {
+  Log.startObservation();
+  for (size_t I = 0; I < InputSpecs.size(); ++I)
+    Log.logTensorValue(I, reinterpret_cast<const char *>(getTensorUntyped(I)));
+  Log.endObservation();
+  Log.flush();
+
+  size_t InsPoint = 0;
+  char *Buff = OutputBuffer.data();
+  const size_t Limit = OutputBuffer.size();
+  while (InsPoint < Limit) {
+    auto Read = Inbound.read(Buff + InsPoint, OutputBuffer.size() - InsPoint);
+    if (Read < 0) {
+      Ctx.emitError("Failed reading from inbound file");
+      break;
+    }
+    InsPoint += Read;
+  }
+  if (DebugReply != TensorType::Invalid)
+    dbgs() << tensorValueToString(OutputBuffer.data(), OutputSpec);
+  return OutputBuffer.data();
+}
\ No newline at end of file
index 4f7428ded85e0fc4fb1d1d625ac4445cbd4fe61a..d4dc5570f8e4a8616bc98b0d8e53b5f25bee1b56 100644 (file)
 // utils.
 //
 //===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/TensorSpec.h"
 #include "llvm/Support/CommandLine.h"
@@ -102,4 +104,21 @@ std::optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
   return std::nullopt;
 }
 
+std::string tensorValueToString(const char *Buffer, const TensorSpec &Spec) {
+  switch (Spec.type()) {
+#define _IMR_DBG_PRINTER(T, N)                                                 \
+  case TensorType::N: {                                                        \
+    const T *TypedBuff = reinterpret_cast<const T *>(Buffer);                  \
+    auto R = llvm::make_range(TypedBuff, TypedBuff + Spec.getElementCount());  \
+    return llvm::join(                                                         \
+        llvm::map_range(R, [](T V) { return std::to_string(V); }), ",");       \
+  }
+    SUPPORTED_TENSOR_TYPES(_IMR_DBG_PRINTER)
+#undef _IMR_DBG_PRINTER
+  case TensorType::Total:
+  case TensorType::Invalid:
+    llvm_unreachable("invalid tensor type");
+  }
+}
+
 } // namespace llvm
index 79cd77a1f558e4e970cc8e2224673e3c3d3df23c..3750516ef7d58b79383e60168275847008684b1a 100644 (file)
@@ -7,10 +7,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/InteractiveModelRunner.h"
 #include "llvm/Analysis/NoInferenceModelRunner.h"
 #include "llvm/Analysis/ReleaseModeModelRunner.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 
+#include <atomic>
+#include <thread>
+
 using namespace llvm;
 
 namespace llvm {
@@ -116,4 +124,135 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) {
   EXPECT_EQ(*Evaluator->getTensor<int64_t>(0), 1);
   EXPECT_EQ(*Evaluator->getTensor<int64_t>(1), 2);
   EXPECT_EQ(*Evaluator->getTensor<int64_t>(2), -3);
+}
+
+TEST(InteractiveModelRunner, Evaluation) {
+  LLVMContext Ctx;
+  // Test the interaction with an external advisor by asking for advice twice.
+  // Use simple values, since we use the Logger underneath, that's tested more
+  // extensively elsewhere.
+  std::vector<TensorSpec> Inputs{
+      TensorSpec::createSpec<int64_t>("a", {1}),
+      TensorSpec::createSpec<int64_t>("b", {1}),
+      TensorSpec::createSpec<int64_t>("c", {1}),
+  };
+  TensorSpec AdviceSpec = TensorSpec::createSpec<float>("advice", {1});
+
+  // Create the 2 files. Ideally we'd create them as named pipes, but that's not
+  // quite supported by the generic API.
+  std::error_code EC;
+  SmallString<64> FromCompilerName;
+  SmallString<64> ToCompilerName;
+  int FromCompilerFD = 0;
+  int ToCompilerFD = 0;
+  ASSERT_EQ(sys::fs::createTemporaryFile("InteractiveModelRunner_Evaluation",
+                                         "temp", FromCompilerFD,
+                                         FromCompilerName),
+            std::error_code());
+
+  ASSERT_EQ(sys::fs::createTemporaryFile("InteractiveModelRunner_Evaluation",
+                                         "temp", ToCompilerFD, ToCompilerName),
+            std::error_code());
+
+  raw_fd_stream FromCompiler(FromCompilerName, EC);
+  EXPECT_FALSE(EC);
+  raw_fd_ostream ToCompiler(ToCompilerName, EC);
+  EXPECT_FALSE(EC);
+  FileRemover Cleanup1(FromCompilerName);
+  FileRemover Cleanup2(ToCompilerName);
+  InteractiveModelRunner Evaluator(Ctx, Inputs, AdviceSpec, FromCompilerName,
+                                   ToCompilerName);
+
+  Evaluator.switchContext("hi");
+
+  // Helper to read headers and other json lines.
+  SmallVector<char, 1024> Buffer;
+  auto ReadLn = [&]() {
+    Buffer.clear();
+    while (true) {
+      char Chr = 0;
+      auto Read = FromCompiler.read(&Chr, 1);
+      EXPECT_GE(Read, 0);
+      if (!Read)
+        continue;
+      if (Chr == '\n')
+        return StringRef(Buffer.data(), Buffer.size());
+      Buffer.push_back(Chr);
+    }
+  };
+  // See include/llvm/Analysis/Utils/TrainingLogger.h
+  // First comes the header
+  auto Header = json::parse(ReadLn());
+  EXPECT_FALSE(Header.takeError());
+  EXPECT_NE(Header->getAsObject()->getArray("features"), nullptr);
+  // Then comes the context
+  EXPECT_FALSE(json::parse(ReadLn()).takeError());
+
+  // Since the evaluator sends the features over and then blocks waiting for
+  // an answer, we must spawn a thread playing the role of the advisor / host:
+  std::atomic<int> SeenObservations = 0;
+  std::thread Advisor([&]() {
+    EXPECT_EQ(SeenObservations, 0);
+    int64_t Features[3] = {0};
+    auto FullyRead = [&]() {
+      size_t InsPt = 0;
+      const size_t ToRead = 3 * Inputs[0].getTotalTensorBufferSize();
+      char *Buff = reinterpret_cast<char *>(Features);
+      while (InsPt < ToRead) {
+        auto Read = FromCompiler.read(Buff + InsPt, ToRead - InsPt);
+        EXPECT_GE(Read, 0);
+        InsPt += Read;
+      }
+    };
+    // Observation
+    EXPECT_FALSE(json::parse(ReadLn()).takeError());
+    // Tensor values
+    FullyRead();
+    // a "\n"
+    char Chr = 0;
+    while (FromCompiler.read(&Chr, 1) == 0) {
+    }
+    EXPECT_EQ(Chr, '\n');
+    EXPECT_EQ(Features[0], 42);
+    EXPECT_EQ(Features[1], 43);
+    EXPECT_EQ(Features[2], 100);
+    ++SeenObservations;
+
+    // Send the advice
+    float Advice = 42.0012;
+    ToCompiler.write(reinterpret_cast<const char *>(&Advice),
+                     AdviceSpec.getTotalTensorBufferSize());
+    ToCompiler.flush();
+
+    // Second observation, and same idea as above
+    EXPECT_FALSE(json::parse(ReadLn()).takeError());
+    FullyRead();
+    while (FromCompiler.read(&Chr, 1) == 0) {
+    }
+    EXPECT_EQ(Chr, '\n');
+    EXPECT_EQ(Features[0], 10);
+    EXPECT_EQ(Features[1], -2);
+    EXPECT_EQ(Features[2], 1);
+    ++SeenObservations;
+    Advice = 50.30;
+    ToCompiler.write(reinterpret_cast<const char *>(&Advice),
+                     AdviceSpec.getTotalTensorBufferSize());
+    ToCompiler.flush();
+  });
+
+  EXPECT_EQ(SeenObservations, 0);
+  *Evaluator.getTensor<int64_t>(0) = 42;
+  *Evaluator.getTensor<int64_t>(1) = 43;
+  *Evaluator.getTensor<int64_t>(2) = 100;
+  float Ret = Evaluator.evaluate<float>();
+  EXPECT_EQ(SeenObservations, 1);
+  EXPECT_FLOAT_EQ(Ret, 42.0012);
+
+  *Evaluator.getTensor<int64_t>(0) = 10;
+  *Evaluator.getTensor<int64_t>(1) = -2;
+  *Evaluator.getTensor<int64_t>(2) = 1;
+  Ret = Evaluator.evaluate<float>();
+  EXPECT_EQ(SeenObservations, 2);
+  EXPECT_FLOAT_EQ(Ret, 50.30);
+  Advisor.join();
 }
\ No newline at end of file
index 09f4ada005e2fb7d079780f1b20cc57ede699be6..0e3cfb10c4a03833add703db3b69ce68e9b351a3 100644 (file)
@@ -59,3 +59,10 @@ TEST(TensorSpecTest, TensorSpecSizesAndTypes) {
   EXPECT_EQ(Spec3DLarge.getElementByteSize(), sizeof(float));
   EXPECT_EQ(Spec1D.getElementByteSize(), sizeof(int16_t));
 }
+
+TEST(TensorSpecTest, PrintValueForDebug) {
+  std::vector<int32_t> Values{1, 3};
+  EXPECT_EQ(tensorValueToString(reinterpret_cast<const char *>(Values.data()),
+                                TensorSpec::createSpec<int32_t>("name", {2})),
+            "1,3");
+}
\ No newline at end of file