From: Aiden Grossman <agrossman154@yahoo.com>
Date: Wed, 28 Sep 2022 18:18:50 +0000 (+0000)
Subject: [MLGO] Add per-instruction MBB frequencies to regalloc dev features
X-Git-Tag: upstream/17.0.6~32227
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8d77f8fde7074639e1146f92e1c1e872316776ed;p=platform%2Fupstream%2Fllvm.git

[MLGO] Add per-instruction MBB frequencies to regalloc dev features

This commit adds in two new features to the ML regalloc eviction
analysis that can be used in ML models, a vector of MBB frequencies and
a vector of indicies mapping instructions to their corresponding basic
blocks. This will allow for further experimentation with per-instruction
features and give a lot more flexibility for future experimentation over
how we're extracting MBB frequency data currently.

Reviewed By: mtrofin, jacobhegna

Differential Revision: https://reviews.llvm.org/D134166
---

diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index e48aac4..b9270f4 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -198,7 +198,11 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
 
 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)                                  \
   M(int64_t, instructions_mapping, InstructionsMappingShape,                   \
-    "A binary matrix mapping LRs to instruction opcodes")
+    "A binary matrix mapping LRs to instruction opcodes")                      \
+  M(float, mbb_frequencies, MBBFrequencyShape,                                 \
+    "A vector of machine basic block frequencies")                             \
+  M(int64_t, mbb_mapping, InstructionsShape,                                   \
+    "A vector of indicies mapping instructions to MBBs")
 #else
 #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)
 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)
@@ -729,7 +733,19 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
           }
           return CurrentMachineInstruction->getOpcode();
         },
+        [this](SlotIndex InputIndex) -> float {
+          auto *CurrentMachineInstruction =
+              LIS->getInstructionFromIndex(InputIndex);
+          return MBFI.getBlockFreqRelativeToEntryBlock(
+              CurrentMachineInstruction->getParent());
+        },
+        [this](SlotIndex InputIndex) -> MachineBasicBlock * {
+          auto *CurrentMachineInstruction =
+              LIS->getInstructionFromIndex(InputIndex);
+          return CurrentMachineInstruction->getParent();
+        },
         FeatureIDs::instructions, FeatureIDs::instructions_mapping,
+        FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping,
         LIS->getSlotIndexes()->getLastIndex());
   }
 #endif // #ifdef LLVM_HAVE_TF_API
@@ -914,12 +930,14 @@ void MLEvictAdvisor::extractFeatures(
 #undef SET
 }
 
-void extractInstructionFeatures(SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
-                                MLModelRunner *RegallocRunner,
-                                function_ref<int(SlotIndex)> GetOpcode,
-                                const int InstructionsIndex,
-                                const int InstructionsMappingIndex,
-                                const SlotIndex LastIndex) {
+void extractInstructionFeatures(
+    SmallVectorImpl<LRStartEndInfo> &LRPosInfo, MLModelRunner *RegallocRunner,
+    function_ref<int(SlotIndex)> GetOpcode,
+    function_ref<float(SlotIndex)> GetMBBFreq,
+    function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
+    const int InstructionsIndex, const int InstructionsMappingIndex,
+    const int MBBFreqIndex, const int MBBMappingIndex,
+    const SlotIndex LastIndex) {
   // This function extracts instruction based features relevant to the eviction
   // problem currently being solved. This function ends up extracting two
   // tensors.
@@ -929,6 +947,10 @@ void extractInstructionFeatures(SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
   // 2 - A binary mapping matrix of size (LR count * max
   // instruction count) which maps where the LRs are live to the actual opcodes
   // for which they are live.
+  // 3 - A vector of size max supported MBB count storing MBB frequencies,
+  // encompassing all of the MBBs covered by the eviction problem.
+  // 4 - A vector of size max instruction count of indices to members of the MBB
+  // frequency vector, mapping each instruction to its associated MBB.
 
   // Start off by sorting the segments based on the beginning slot index.
   std::sort(
@@ -937,6 +959,8 @@ void extractInstructionFeatures(SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
   size_t InstructionIndex = 0;
   size_t CurrentSegmentIndex = 0;
   SlotIndex CurrentIndex = LRPosInfo[0].Begin;
+  std::map<MachineBasicBlock *, size_t> VisitedMBBs;
+  size_t CurrentMBBIndex = 0;
   // This loop processes all the segments sequentially by starting at the
   // beginning slot index of the first segment, iterating through all the slot
   // indices before the end slot index of that segment (while checking for
@@ -961,6 +985,14 @@ void extractInstructionFeatures(SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
         CurrentIndex = CurrentIndex.getNextIndex();
         continue;
       }
+      MachineBasicBlock *CurrentMBBReference = GetMBBReference(CurrentIndex);
+      if (VisitedMBBs.count(CurrentMBBReference) == 0) {
+        VisitedMBBs[CurrentMBBReference] = CurrentMBBIndex;
+        ++CurrentMBBIndex;
+      }
+      extractMBBFrequency(CurrentIndex, InstructionIndex, VisitedMBBs,
+                          GetMBBFreq, CurrentMBBReference, RegallocRunner,
+                          MBBFreqIndex, MBBMappingIndex);
       // Current code assumes we're not going to get any disjointed segments
       assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex);
       RegallocRunner->getTensor<int64_t>(InstructionsIndex)[InstructionIndex] =
@@ -1015,6 +1047,23 @@ void extractInstructionFeatures(SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
   }
 }
 
+void extractMBBFrequency(const SlotIndex CurrentIndex,
+                         const size_t CurrentInstructionIndex,
+                         std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
+                         function_ref<float(SlotIndex)> GetMBBFreq,
+                         MachineBasicBlock *CurrentMBBReference,
+                         MLModelRunner *RegallocRunner, const int MBBFreqIndex,
+                         const int MBBMappingIndex) {
+  size_t CurrentMBBIndex = VisitedMBBs[CurrentMBBReference];
+  float CurrentMBBFreq = GetMBBFreq(CurrentIndex);
+  if (CurrentMBBIndex < ModelMaxSupportedMBBCount) {
+    RegallocRunner->getTensor<float>(MBBFreqIndex)[CurrentMBBIndex] =
+        CurrentMBBFreq;
+    RegallocRunner->getTensor<int64_t>(
+        MBBMappingIndex)[CurrentInstructionIndex] = CurrentMBBIndex;
+  }
+}
+
 // Development mode-specific implementations
 #ifdef LLVM_HAVE_TF_API
 
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
index cf9107b..e36a411 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
@@ -15,6 +15,7 @@
 #define LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
 
 #include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 
 using namespace llvm;
@@ -33,9 +34,20 @@ struct LRStartEndInfo {
 void extractInstructionFeatures(
     llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
     MLModelRunner *RegallocRunner, function_ref<int(SlotIndex)> GetOpcode,
+    function_ref<float(SlotIndex)> GetMBBFreq,
+    function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
     const int InstructionsIndex, const int InstructionsMappingIndex,
+    const int MBBFreqIndex, const int MBBMappingIndex,
     const SlotIndex LastIndex);
 
+void extractMBBFrequency(const SlotIndex CurrentIndex,
+                         const size_t CurrentInstructionIndex,
+                         std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
+                         function_ref<float(SlotIndex)> GetMBBFreq,
+                         MachineBasicBlock *CurrentMBBReference,
+                         MLModelRunner *RegallocRunner, const int MBBFreqIndex,
+                         const int MBBMappingIndex);
+
 // This is the maximum number of interfererring ranges. That's the number of
 // distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize.
 // For X86, that's 32.
@@ -69,4 +81,13 @@ static const std::vector<int64_t> InstructionsShape{
 static const std::vector<int64_t> InstructionsMappingShape{
     1, NumberOfInterferences, ModelMaxSupportedInstructionCount};
 
+// When extracting mappings between MBBs and individual instructions, we create
+// a vector of MBB frequencies, currently of size 100, which was a value
+// determined through experimentation to encompass the vast majority of eviction
+// problems. The actual mapping is the same shape as the instruction opcodes
+// vector.
+static const int64_t ModelMaxSupportedMBBCount = 100;
+static const std::vector<int64_t> MBBFrequencyShape{1,
+                                                    ModelMaxSupportedMBBCount};
+
 #endif // LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll
index 9c97314..7ab0115 100644
--- a/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll
+++ b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll
@@ -50,3 +50,16 @@
 ; There are a total of 23 eviction problems with this test.
 ; CHECK-COUNT-22: int64_list
 ; CHECK: key: \"is_free\"
+; Make sure that we're exporting the mbb_frequencies. Don't actually check
+; values due to all values being floating point/liable to change very easily.
+; CHECK: key: \"mbb_frequencies\"
+; Make sure that we have the mbb_mapping feature, and that the first couple
+; of values are correct.
+; CHECK: key: \"mbb_mapping\"
+; CHECK-NEXT: 0
+; CHECK-SAME: 0
+; CHECK-SAME: 0
+; CHECK-SAME: 0
+; CHECK-SAME: 0
+; CHECK-SAME: 1
+; CHECK-SAME: 1
diff --git a/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp b/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp
index 022cfab..4a3e432 100644
--- a/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp
+++ b/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp
@@ -7,19 +7,35 @@
 //===----------------------------------------------------------------------===//
 
 #include "../../lib/CodeGen/MLRegallocEvictAdvisor.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/NoInferenceModelRunner.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+#include <string>
 #include <vector>
 
 using testing::ContainerEq;
 using testing::Test;
 
+namespace {
+
+#include "MFCommon.inc"
+
 struct LRPosInfoIndexes {
   size_t StartIndex;
   size_t EndIndex;
@@ -68,7 +84,9 @@ protected:
     const std::vector<TensorSpec> Inputs{
         TensorSpec::createSpec<int64_t>("instructions", InstructionsShape),
         TensorSpec::createSpec<int64_t>("instructions_mapping",
-                                        InstructionsMappingShape)};
+                                        InstructionsMappingShape),
+        TensorSpec::createSpec<float>("mbb_frequencies", MBBFrequencyShape),
+        TensorSpec::createSpec<int64_t>("mbb_mapping", InstructionsShape)};
     LLVMContext Ctx;
     return NoInferenceModelRunner(Ctx, Inputs);
   }
@@ -103,7 +121,10 @@ protected:
     SlotIndex LastIndex = OverlapProblem[MaxIndex].End;
     extractInstructionFeatures(
         OverlapProblem, &ModelRunner,
-        [](SlotIndex InputSlot) -> int { return 0; }, 0, 1, LastIndex);
+        [](SlotIndex InputSlot) -> int { return 0; },
+        [](SlotIndex InputSlot) -> float { return 0.0f; },
+        [](SlotIndex InputSlot) -> MachineBasicBlock * { return nullptr; }, 0,
+        1, 2, 3, LastIndex);
     std::vector<int64_t> MappingMatrix(
         ModelRunner.getTensor<int64_t>(1),
         ModelRunner.getTensor<int64_t>(1) +
@@ -154,7 +175,9 @@ TEST_F(RegallocDevelopmentFeaturesTest, InstructionOpcodesAreCorrect) {
       [FirstIndex](SlotIndex InputSlot) -> int {
         return FirstIndex.distance(InputSlot) / SlotIndex::InstrDist;
       },
-      0, 1, LastIndex);
+      [](SlotIndex InputSlot) -> float { return 0.0f; },
+      [](SlotIndex InputSlot) -> MachineBasicBlock * { return nullptr; }, 0, 1,
+      2, 3, LastIndex);
   for (size_t CurrentInstructionIndex = 0;
        CurrentInstructionIndex < ModelMaxSupportedInstructionCount;
        ++CurrentInstructionIndex) {
@@ -207,3 +230,62 @@ TEST_F(RegallocDevelopmentFeaturesTest, InternalMultiOverlap) {
   OverlapSetup.push_back({35, 60, 2});
   runOverlapTest(OverlapSetup);
 }
+
+TEST_F(RegallocDevelopmentFeaturesTest, SingleMBBTest) {
+  NoInferenceModelRunner ModelRunner = setupModelRunner();
+  SlotIndex CurrentIndex;
+  // set index to 1 so we can ensure that the mapping actually get set
+  std::map<MachineBasicBlock *, size_t> VisitedMBBs = {{nullptr, 1}};
+  extractMBBFrequency(
+      CurrentIndex, 0, VisitedMBBs,
+      [](SlotIndex InputSlot) -> float { return 1.0f; }, nullptr, &ModelRunner,
+      2, 3);
+  ASSERT_FLOAT_EQ(ModelRunner.getTensor<float>(2)[1], 1.0f);
+  ASSERT_EQ(ModelRunner.getTensor<int64_t>(3)[0], 1);
+}
+
+TEST_F(RegallocDevelopmentFeaturesTest, MBBFullTruncated) {
+  SmallVector<LRPosInfoIndexes, 1> OverlapSetup;
+  OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 0});
+  ilist<IndexListEntry> IndexList;
+  auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList);
+  NoInferenceModelRunner ModelRunner = setupModelRunner();
+  SlotIndex LastIndex = OverlapProblem[0].End;
+  SlotIndex FirstIndex = OverlapProblem[0].Begin;
+
+  LLVMContext Ctx;
+  Module Mod("Module", Ctx);
+  auto MF = createMachineFunction(Ctx, Mod);
+  std::array<MachineBasicBlock *, ModelMaxSupportedInstructionCount>
+      MBBsForTest;
+  for (size_t I = 0; I < ModelMaxSupportedInstructionCount; ++I) {
+    MBBsForTest[I] = MF->CreateMachineBasicBlock();
+  }
+
+  extractInstructionFeatures(
+      OverlapProblem, &ModelRunner,
+      [](SlotIndex InputSlot) -> int { return 0; },
+      [FirstIndex](SlotIndex InputSlot) -> float {
+        return static_cast<float>(FirstIndex.distance(InputSlot) /
+                                  SlotIndex::InstrDist);
+      },
+      [FirstIndex, MBBsForTest](SlotIndex InputSlot) -> MachineBasicBlock * {
+        return MBBsForTest[FirstIndex.distance(InputSlot) /
+                           SlotIndex::InstrDist];
+      },
+      0, 1, 2, 3, LastIndex);
+  for (size_t MBBIndex = 0; MBBIndex < ModelMaxSupportedMBBCount; ++MBBIndex) {
+    ASSERT_FLOAT_EQ(ModelRunner.getTensor<float>(2)[MBBIndex],
+                    static_cast<float>(MBBIndex));
+    ASSERT_EQ(ModelRunner.getTensor<int64_t>(3)[MBBIndex],
+              static_cast<int64_t>(MBBIndex));
+  }
+  // the rest of the mapping values should be zero (truncated to 100 MBBs)
+  for (size_t MBBIndex = ModelMaxSupportedMBBCount;
+       MBBIndex < ModelMaxSupportedInstructionCount; ++MBBIndex) {
+    ASSERT_EQ(ModelRunner.getTensor<int64_t>(3)[MBBIndex],
+              static_cast<int64_t>(0));
+  }
+}
+
+} // end namespace