[RFC][X86][MemFold] Upgrade the mechanism of auto-generated Memory Folding Table
authorBing1 Yu <bing1.yu@intel.com>
Mon, 20 Mar 2023 05:48:01 +0000 (13:48 +0800)
committerBing1 Yu <bing1.yu@intel.com>
Mon, 20 Mar 2023 06:42:52 +0000 (14:42 +0800)
1. Align ManualMapSet with X86MemoryFoldTableEntry instead of using UnfoldStrategy
2. ManualMapSet able to update the existing record in auto-generated MemFold table

Reviewed By: skan

Differential Revision: https://reviews.llvm.org/D142084

llvm/include/llvm/Support/X86FoldTablesUtils.h [new file with mode: 0644]
llvm/lib/Target/X86/X86InstrFoldTables.h
llvm/test/TableGen/x86-auto-memfold.td [new file with mode: 0644]
llvm/utils/TableGen/X86FoldTablesEmitter.cpp
llvm/utils/TableGen/X86FoldTablesEmitterManualMapSet.inc [new file with mode: 0644]

diff --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h
new file mode 100644 (file)
index 0000000..4829700
--- /dev/null
@@ -0,0 +1,59 @@
+//===-- X86FoldTablesUtils.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_X86FOLDTABLESUTILS_H
+#define LLVM_SUPPORT_X86FOLDTABLESUTILS_H
+
+namespace {
+enum {
+  // Select which memory operand is being unfolded.
+  // (stored in bits 0 - 2)
+  TB_INDEX_0    = 0,
+  TB_INDEX_1    = 1,
+  TB_INDEX_2    = 2,
+  TB_INDEX_3    = 3,
+  TB_INDEX_4    = 4,
+  TB_INDEX_MASK = 0x7,
+
+  // Do not insert the reverse map (MemOp -> RegOp) into the table.
+  // This may be needed because there is a many -> one mapping.
+  TB_NO_REVERSE   = 1 << 3,
+
+  // Do not insert the forward map (RegOp -> MemOp) into the table.
+  // This is needed for Native Client, which prohibits branch
+  // instructions from using a memory operand.
+  TB_NO_FORWARD   = 1 << 4,
+
+  TB_FOLDED_LOAD  = 1 << 5,
+  TB_FOLDED_STORE = 1 << 6,
+  TB_FOLDED_BCAST = 1 << 7,
+
+  // Minimum alignment required for load/store.
+  // Used for RegOp->MemOp conversion. Encoded as Log2(Align) + 1 to allow 0
+  // to mean align of 0.
+  // (stored in bits 8 - 11)
+  TB_ALIGN_SHIFT = 8,
+  TB_ALIGN_NONE  =   0 << TB_ALIGN_SHIFT,
+  TB_ALIGN_16    =   5 << TB_ALIGN_SHIFT,
+  TB_ALIGN_32    =   6 << TB_ALIGN_SHIFT,
+  TB_ALIGN_64    =   7 << TB_ALIGN_SHIFT,
+  TB_ALIGN_MASK  = 0xf << TB_ALIGN_SHIFT,
+
+  // Broadcast type.
+  // (stored in bits 12 - 13)
+  TB_BCAST_TYPE_SHIFT = 12,
+  TB_BCAST_D    =   0 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_Q    =   1 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_SS   =   2 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_SD   =   3 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT,
+
+  // Unused bits 14-15
+};
+}
+#endif // LLVM_SUPPORT_X86FOLDTABLESUTILS_H
\ No newline at end of file
index b7aca27..384369b 100644 (file)
 #define LLVM_LIB_TARGET_X86_X86INSTRFOLDTABLES_H
 
 #include <cstdint>
+#include "llvm/Support/X86FoldTablesUtils.h"
 
 namespace llvm {
 
-enum {
-  // Select which memory operand is being unfolded.
-  // (stored in bits 0 - 2)
-  TB_INDEX_0    = 0,
-  TB_INDEX_1    = 1,
-  TB_INDEX_2    = 2,
-  TB_INDEX_3    = 3,
-  TB_INDEX_4    = 4,
-  TB_INDEX_MASK = 0x7,
-
-  // Do not insert the reverse map (MemOp -> RegOp) into the table.
-  // This may be needed because there is a many -> one mapping.
-  TB_NO_REVERSE   = 1 << 3,
-
-  // Do not insert the forward map (RegOp -> MemOp) into the table.
-  // This is needed for Native Client, which prohibits branch
-  // instructions from using a memory operand.
-  TB_NO_FORWARD   = 1 << 4,
-
-  TB_FOLDED_LOAD  = 1 << 5,
-  TB_FOLDED_STORE = 1 << 6,
-  TB_FOLDED_BCAST = 1 << 7,
-
-  // Minimum alignment required for load/store.
-  // Used for RegOp->MemOp conversion. Encoded as Log2(Align) + 1 to allow 0
-  // to mean align of 0.
-  // (stored in bits 8 - 11)
-  TB_ALIGN_SHIFT = 8,
-  TB_ALIGN_NONE  =   0 << TB_ALIGN_SHIFT,
-  TB_ALIGN_16    =   5 << TB_ALIGN_SHIFT,
-  TB_ALIGN_32    =   6 << TB_ALIGN_SHIFT,
-  TB_ALIGN_64    =   7 << TB_ALIGN_SHIFT,
-  TB_ALIGN_MASK  = 0xf << TB_ALIGN_SHIFT,
-
-  // Broadcast type.
-  // (stored in bits 12 - 13)
-  TB_BCAST_TYPE_SHIFT = 12,
-  TB_BCAST_D    =   0 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_Q    =   1 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_SS   =   2 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_SD   =   3 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT,
-
-  // Unused bits 14-15
-};
-
 // This struct is used for both the folding and unfold tables. They KeyOp
 // is used to determine the sorting order.
 struct X86MemoryFoldTableEntry {
diff --git a/llvm/test/TableGen/x86-auto-memfold.td b/llvm/test/TableGen/x86-auto-memfold.td
new file mode 100644 (file)
index 0000000..55bb887
--- /dev/null
@@ -0,0 +1,2 @@
+// RUN: llvm-tblgen -gen-x86-fold-tables -asmwriternum=1 %p/../../lib/Target/X86/X86.td -I %p/../../include -I %p/../../lib/Target/X86/ -I %p/../../include/ -I %p/../../lib/Target/ --write-if-changed  -o %t1
+// RUN: cmp  --ignore-initial=0:568 %p/../../lib/Target/X86/X86MemFoldTables.inc %t1
index d4b2d06..052151d 100644 (file)
@@ -15,7 +15,9 @@
 #include "CodeGenTarget.h"
 #include "TableGenBackends.h"
 #include "X86RecognizableInstr.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/X86FoldTablesUtils.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
@@ -23,23 +25,14 @@ using namespace llvm;
 using namespace X86Disassembler;
 
 namespace {
-
-// 3 possible strategies for the unfolding flag (TB_NO_REVERSE) of the
-// manual added entries.
-enum UnfoldStrategy {
-  UNFOLD,     // Allow unfolding
-  NO_UNFOLD,  // Prevent unfolding
-  NO_STRATEGY // Make decision according to operands' sizes
-};
-
 // Represents an entry in the manual mapped instructions set.
 struct ManualMapEntry {
   const char *RegInstStr;
   const char *MemInstStr;
-  UnfoldStrategy Strategy;
+  uint16_t Strategy;
 
   ManualMapEntry(const char *RegInstStr, const char *MemInstStr,
-                 UnfoldStrategy Strategy = NO_STRATEGY)
+                 uint16_t Strategy = 0)
       : RegInstStr(RegInstStr), MemInstStr(MemInstStr), Strategy(Strategy) {}
 };
 
@@ -52,37 +45,7 @@ const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD",
                                  "PCMPESTRM", "PCMPESTRI",
                                  "PCMPISTRM", "PCMPISTRI" };
 
-// For manually mapping instructions that do not match by their encoding.
-const ManualMapEntry ManualMapSet[] = {
-    { "ADD16ri_DB",         "ADD16mi",         NO_UNFOLD  },
-    { "ADD16ri8_DB",        "ADD16mi8",        NO_UNFOLD  },
-    { "ADD16rr_DB",         "ADD16mr",         NO_UNFOLD  },
-    { "ADD32ri_DB",         "ADD32mi",         NO_UNFOLD  },
-    { "ADD32ri8_DB",        "ADD32mi8",        NO_UNFOLD  },
-    { "ADD32rr_DB",         "ADD32mr",         NO_UNFOLD  },
-    { "ADD64ri32_DB",       "ADD64mi32",       NO_UNFOLD  },
-    { "ADD64ri8_DB",        "ADD64mi8",        NO_UNFOLD  },
-    { "ADD64rr_DB",         "ADD64mr",         NO_UNFOLD  },
-    { "ADD8ri_DB",          "ADD8mi",          NO_UNFOLD  },
-    { "ADD8rr_DB",          "ADD8mr",          NO_UNFOLD  },
-    { "ADD16rr_DB",         "ADD16rm",         NO_UNFOLD  },
-    { "ADD32rr_DB",         "ADD32rm",         NO_UNFOLD  },
-    { "ADD64rr_DB",         "ADD64rm",         NO_UNFOLD  },
-    { "ADD8rr_DB",          "ADD8rm",          NO_UNFOLD  },
-    { "MMX_MOVD64from64rr", "MMX_MOVQ64mr",    UNFOLD },
-    { "MMX_MOVD64grr",      "MMX_MOVD64mr",    UNFOLD },
-    { "MOVLHPSrr",          "MOVHPSrm",        NO_UNFOLD  },
-    { "PUSH16r",            "PUSH16rmm",       UNFOLD },
-    { "PUSH32r",            "PUSH32rmm",       UNFOLD },
-    { "PUSH64r",            "PUSH64rmm",       UNFOLD },
-    { "TAILJMPr",           "TAILJMPm",        UNFOLD },
-    { "TAILJMPr64",         "TAILJMPm64",      UNFOLD },
-    { "TAILJMPr64_REX",     "TAILJMPm64_REX",  UNFOLD },
-    { "VMOVLHPSZrr",        "VMOVHPSZ128rm",   NO_UNFOLD  },
-    { "VMOVLHPSrr",         "VMOVHPSrm",       NO_UNFOLD  },
-};
-
-
+#include "X86FoldTablesEmitterManualMapSet.inc"
 static bool isExplicitAlign(const CodeGenInstruction *Inst) {
   return any_of(ExplicitAlign, [Inst](const char *InstStr) {
     return Inst->TheDef->getName().contains(InstStr);
@@ -106,50 +69,63 @@ class X86FoldTablesEmitter {
 
   public:
     bool CannotUnfold = false;
+    bool CannotFold = false;
     bool IsLoad = false;
     bool IsStore = false;
     bool IsAligned = false;
     unsigned int Alignment = 0;
 
+    X86FoldTableEntry() = default;
     X86FoldTableEntry(const CodeGenInstruction *RegInst,
                       const CodeGenInstruction *MemInst)
         : RegInst(RegInst), MemInst(MemInst) {}
 
     void print(formatted_raw_ostream &OS) const {
+      // Stop printing record if it can't fold and unfold.
+      if(CannotUnfold && CannotFold)
+        return;
       OS.indent(2);
-      OS << "{ X86::" << RegInst->TheDef->getName() << ",";
-      OS.PadToColumn(40);
-      OS  << "X86::" << MemInst->TheDef->getName() << ",";
-      OS.PadToColumn(75);
+      OS << "{X86::" << RegInst->TheDef->getName() << ", ";
+      OS  << "X86::" << MemInst->TheDef->getName() << ", ";
 
       std::string Attrs;
       if (IsLoad)
-        Attrs += "TB_FOLDED_LOAD | ";
+        Attrs += "TB_FOLDED_LOAD|";
       if (IsStore)
-        Attrs += "TB_FOLDED_STORE | ";
+        Attrs += "TB_FOLDED_STORE|";
       if (CannotUnfold)
-        Attrs += "TB_NO_REVERSE | ";
+        Attrs += "TB_NO_REVERSE|";
+      if (CannotFold)
+        Attrs += "TB_NO_FORWARD|";
       if (IsAligned)
-        Attrs += "TB_ALIGN_" + std::to_string(Alignment) + " | ";
+        Attrs += "TB_ALIGN_" + std::to_string(Alignment) + "|";
 
-      StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("| ");
+      StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|");
       if (SimplifiedAttrs.empty())
         SimplifiedAttrs = "0";
 
-      OS << SimplifiedAttrs << " },\n";
+      OS << SimplifiedAttrs << "},\n";
     }
 
-    bool operator<(const X86FoldTableEntry &RHS) const {
-      bool LHSpseudo = RegInst->TheDef->getValueAsBit("isPseudo");
-      bool RHSpseudo = RHS.RegInst->TheDef->getValueAsBit("isPseudo");
+  };
+
+  struct CodeGenInstructionComparator {
+    // Comparator function
+    bool operator()(const CodeGenInstruction *LHS,
+                    const CodeGenInstruction *RHS) const {
+      assert(LHS && RHS && "LHS and RHS shouldn't be nullptr");
+      bool LHSpseudo = LHS->TheDef->getValueAsBit("isPseudo");
+      bool RHSpseudo = RHS->TheDef->getValueAsBit("isPseudo");
       if (LHSpseudo != RHSpseudo)
         return LHSpseudo;
 
-      return RegInst->TheDef->getName() < RHS.RegInst->TheDef->getName();
+      return LHS->TheDef->getName() < RHS->TheDef->getName();
     }
   };
 
-  typedef std::vector<X86FoldTableEntry> FoldTable;
+  typedef std::map<const CodeGenInstruction *, X86FoldTableEntry,
+                   CodeGenInstructionComparator>
+      FoldTable;
   // std::vector for each folding table.
   // Table2Addr - Holds instructions which their memory form performs load+store
   // Table#i - Holds instructions which the their memory form perform a load OR
@@ -171,14 +147,14 @@ private:
   // Decides to which table to add the entry with the given instructions.
   // S sets the strategy of adding the TB_NO_REVERSE flag.
   void updateTables(const CodeGenInstruction *RegInstr,
-                    const CodeGenInstruction *MemInstr,
-                    const UnfoldStrategy S = NO_STRATEGY);
+                    const CodeGenInstruction *MemInstr, const uint16_t S = 0,
+                    bool IsManual = false);
 
   // Generates X86FoldTableEntry with the given instructions and fill it with
   // the appropriate flags - then adds it to Table.
   void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInstr,
-                         const CodeGenInstruction *MemInstr,
-                         const UnfoldStrategy S, const unsigned int FoldedInd);
+                         const CodeGenInstruction *MemInstr, const uint16_t S,
+                         const unsigned int FoldedInd, bool isManual);
 
   // Print the given table as a static const C++ array of type
   // X86MemoryFoldTableEntry.
@@ -187,8 +163,8 @@ private:
     OS << "static const X86MemoryFoldTableEntry MemoryFold" << TableName
        << "[] = {\n";
 
-    for (const X86FoldTableEntry &E : Table)
-      E.print(OS);
+    for (auto &E : Table)
+      E.second.print(OS);
 
     OS << "};\n\n";
   }
@@ -393,13 +369,26 @@ private:
 void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
                                              const CodeGenInstruction *RegInstr,
                                              const CodeGenInstruction *MemInstr,
-                                             const UnfoldStrategy S,
-                                             const unsigned int FoldedInd) {
+                                             const uint16_t S,
+                                             const unsigned int FoldedInd,
+                                             bool isManual) {
 
   X86FoldTableEntry Result = X86FoldTableEntry(RegInstr, MemInstr);
   Record *RegRec = RegInstr->TheDef;
   Record *MemRec = MemInstr->TheDef;
 
+  if (isManual) {
+    Result.CannotUnfold = (S & TB_NO_REVERSE) != 0;
+    Result.CannotFold = (S & TB_NO_FORWARD) != 0;
+    Result.IsLoad = (S & TB_FOLDED_LOAD) != 0;
+    Result.IsStore = (S & TB_FOLDED_STORE) != 0;
+    Result.IsAligned = (S & TB_ALIGN_MASK) != 0;
+    auto AlignValue = (S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
+    Result.Alignment = AlignValue > 0 ? (1 << (AlignValue - 1)) : 0;
+    Table[RegInstr] = Result;
+    return;
+  }
+
   // Only table0 entries should explicitly specify a load or store flag.
   if (&Table == &Table0) {
     unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs();
@@ -422,12 +411,27 @@ void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
   // the register in the register form instruction.
   // If the register's size is greater than the memory's operand size, do not
   // allow unfolding.
-  if (S == UNFOLD)
-    Result.CannotUnfold = false;
-  else if (S == NO_UNFOLD)
+
+  // the unfolded load size will be based on the register size. If that’s bigger
+  // than the memory operand size, the unfolded load will load more memory and
+  // potentially cause a memory fault.
+  if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
+    Result.CannotUnfold = true;
+
+  // Check no-kz version's isMoveReg
+  Record *BaseDef = nullptr;
+  if (RegRec->getName().ends_with("rkz") &&
+      (BaseDef = Records.getDef(
+           RegRec->getName().substr(0, RegRec->getName().size() - 2)))) {
+    Result.CannotUnfold =
+        Target.getInstruction(BaseDef).isMoveReg ? true : Result.CannotUnfold;
+  } else if (RegRec->getName().ends_with("rk") &&
+             (BaseDef = Records.getDef(
+                  RegRec->getName().substr(0, RegRec->getName().size() - 1)))) {
+    Result.CannotUnfold =
+        Target.getInstruction(BaseDef).isMoveReg ? true : Result.CannotUnfold;
+  } else if (RegInstr->isMoveReg && Result.IsStore)
     Result.CannotUnfold = true;
-  else if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
-    Result.CannotUnfold = true; // S == NO_STRATEGY
 
   uint64_t Enc = getValueFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits"));
   if (isExplicitAlign(RegInstr)) {
@@ -445,13 +449,19 @@ void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
       Result.Alignment = 16;
     }
   }
+  // Expand is only ever created as a masked instruction. It is not safe to
+  // unfold a masked expand because we don't know if it came from an expand load
+  // intrinsic or folding a plain load. If it is from a expand load intrinsic,
+  // Unfolding to plain load would read more elements and could trigger a fault.
+  if (RegRec->getName().contains("EXPAND"))
+    Result.CannotUnfold = true;
 
-  Table.push_back(Result);
+  Table[RegInstr] = Result;
 }
 
 void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
                                         const CodeGenInstruction *MemInstr,
-                                        const UnfoldStrategy S) {
+                                        const uint16_t S, bool IsManual) {
 
   Record *RegRec = RegInstr->TheDef;
   Record *MemRec = MemInstr->TheDef;
@@ -462,7 +472,7 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
 
   // Instructions which Read-Modify-Write should be added to Table2Addr.
   if (MemOutSize != RegOutSize && MemInSize == RegInSize) {
-    addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0);
+    addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0, IsManual);
     return;
   }
 
@@ -479,19 +489,19 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
           isMemoryOperand(MemOpRec)) {
         switch (i) {
         case 0:
-          addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
+          addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0, IsManual);
           return;
         case 1:
-          addEntryWithFlags(Table1, RegInstr, MemInstr, S, 1);
+          addEntryWithFlags(Table1, RegInstr, MemInstr, S, 1, IsManual);
           return;
         case 2:
-          addEntryWithFlags(Table2, RegInstr, MemInstr, S, 2);
+          addEntryWithFlags(Table2, RegInstr, MemInstr, S, 2, IsManual);
           return;
         case 3:
-          addEntryWithFlags(Table3, RegInstr, MemInstr, S, 3);
+          addEntryWithFlags(Table3, RegInstr, MemInstr, S, 3, IsManual);
           return;
         case 4:
-          addEntryWithFlags(Table4, RegInstr, MemInstr, S, 4);
+          addEntryWithFlags(Table4, RegInstr, MemInstr, S, 4, IsManual);
           return;
         }
       }
@@ -508,7 +518,7 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
     Record *MemOpRec = MemInstr->Operands[RegOutSize - 1].Rec;
     if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) &&
         getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec))
-      addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
+      addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0, IsManual);
   }
 }
 
@@ -591,17 +601,9 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
     Record *MemInstIter = Records.getDef(Entry.MemInstStr);
 
     updateTables(&(Target.getInstruction(RegInstIter)),
-                 &(Target.getInstruction(MemInstIter)), Entry.Strategy);
+                 &(Target.getInstruction(MemInstIter)), Entry.Strategy, true);
   }
 
-  // Sort the tables before printing.
-  llvm::sort(Table2Addr);
-  llvm::sort(Table0);
-  llvm::sort(Table1);
-  llvm::sort(Table2);
-  llvm::sort(Table3);
-  llvm::sort(Table4);
-
   // Print all tables.
   printTable(Table2Addr, "Table2Addr", OS);
   printTable(Table0, "Table0", OS);
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitterManualMapSet.inc b/llvm/utils/TableGen/X86FoldTablesEmitterManualMapSet.inc
new file mode 100644 (file)
index 0000000..8b7f387
--- /dev/null
@@ -0,0 +1,83 @@
+const ManualMapEntry ManualMapSet[] = {
+    // Part1: These following records are for manually mapping instructions that
+    // do not match by their encoding.
+    { "ADD16ri_DB",         "ADD16mi",         TB_NO_REVERSE  },
+    { "ADD16ri8_DB",        "ADD16mi8",        TB_NO_REVERSE  },
+    { "ADD16rr_DB",         "ADD16mr",         TB_NO_REVERSE  },
+    { "ADD32ri_DB",         "ADD32mi",         TB_NO_REVERSE  },
+    { "ADD32ri8_DB",        "ADD32mi8",        TB_NO_REVERSE  },
+    { "ADD32rr_DB",         "ADD32mr",         TB_NO_REVERSE  },
+    { "ADD64ri32_DB",       "ADD64mi32",       TB_NO_REVERSE  },
+    { "ADD64ri8_DB",        "ADD64mi8",        TB_NO_REVERSE  },
+    { "ADD64rr_DB",         "ADD64mr",         TB_NO_REVERSE  },
+    { "ADD8ri_DB",          "ADD8mi",          TB_NO_REVERSE  },
+    { "ADD8rr_DB",          "ADD8mr",          TB_NO_REVERSE  },
+    { "ADD16rr_DB",         "ADD16rm",         TB_NO_REVERSE  },
+    { "ADD32rr_DB",         "ADD32rm",         TB_NO_REVERSE  },
+    { "ADD64rr_DB",         "ADD64rm",         TB_NO_REVERSE  },
+    { "ADD8rr_DB",          "ADD8rm",          TB_NO_REVERSE  },
+    { "MMX_MOVD64from64rr", "MMX_MOVQ64mr",    TB_FOLDED_STORE },
+    { "MMX_MOVD64grr",      "MMX_MOVD64mr",    TB_FOLDED_STORE },
+    { "MOV64toSDrr",        "MOV64mr",         TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "MOVDI2SSrr",         "MOV32mr",         TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "MOVPQIto64rr",       "MOVPQI2QImr",     TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "MOVSDto64rr",        "MOVSDmr",         TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "MOVSS2DIrr",         "MOVSSmr",         TB_FOLDED_STORE },
+    { "MOVLHPSrr",          "MOVHPSrm",        TB_NO_REVERSE  },
+    { "PUSH16r",            "PUSH16rmm",       TB_FOLDED_LOAD },
+    { "PUSH32r",            "PUSH32rmm",       TB_FOLDED_LOAD },
+    { "PUSH64r",            "PUSH64rmm",       TB_FOLDED_LOAD },
+    { "TAILJMPr",           "TAILJMPm",        TB_FOLDED_LOAD },
+    { "TAILJMPr64",         "TAILJMPm64",      TB_FOLDED_LOAD },
+    { "TAILJMPr64_REX",     "TAILJMPm64_REX",  TB_FOLDED_LOAD },
+    { "TCRETURNri",         "TCRETURNmi",      TB_FOLDED_LOAD | TB_NO_FORWARD },
+    { "TCRETURNri64",       "TCRETURNmi64",    TB_FOLDED_LOAD | TB_NO_FORWARD },
+    { "VMOVLHPSZrr",        "VMOVHPSZ128rm",   TB_NO_REVERSE  },
+    { "VMOVLHPSrr",         "VMOVHPSrm",       TB_NO_REVERSE  },
+    { "VMOV64toSDZrr",      "MOV64mr",         TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "VMOV64toSDrr",       "MOV64mr",         TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "VMOVDI2SSZrr",       "MOV32mr",         TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "VMOVDI2SSrr",        "MOV32mr",         TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "VMOVPQIto64Zrr",     "VMOVPQI2QIZmr",   TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "VMOVPQIto64rr",      "VMOVPQI2QImr",    TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "VMOVSDto64Zrr",      "VMOVSDZmr",       TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "VMOVSDto64rr",       "VMOVSDmr",        TB_FOLDED_STORE | TB_NO_REVERSE },
+    { "VMOVSS2DIZrr",       "VMOVSSZmr",       TB_FOLDED_STORE },
+    { "VMOVSS2DIrr",        "VMOVSSmr",        TB_FOLDED_STORE },
+    { "MMX_MOVD64to64rr",   "MMX_MOVQ64rm",    0 },
+    { "MOV64toPQIrr",       "MOVQI2PQIrm",     TB_NO_REVERSE },
+    { "MOV64toSDrr",        "MOVSDrm_alt",     TB_NO_REVERSE },
+    { "MOVDI2SSrr",         "MOVSSrm_alt",     0 },
+    { "VMOV64toPQIZrr",     "VMOVQI2PQIZrm",    TB_NO_REVERSE },
+    { "VMOV64toPQIrr",      "VMOVQI2PQIrm",     TB_NO_REVERSE },
+    { "VMOV64toSDZrr",      "VMOVSDZrm_alt",   TB_NO_REVERSE },
+    { "VMOV64toSDrr",       "VMOVSDrm_alt",    TB_NO_REVERSE },
+    { "VMOVDI2SSZrr",       "VMOVSSZrm_alt",   0 },
+    { "VMOVDI2SSrr",        "VMOVSSrm_alt",    0 },
+    { "MOVSDrr",            "MOVLPDrm",         TB_NO_REVERSE },
+    { "VMOVSDZrr",          "VMOVLPDZ128rm",    TB_NO_REVERSE },
+    { "VMOVSDrr",           "VMOVLPDrm",        TB_NO_REVERSE },
+
+    // Part2: These following records are for manually mapping instructions that
+    // have same opcode.
+    // INSERTPSrm has no count_s while INSERTPSrr has count_s.
+    // count_s is to indicate which element in dst vector is inserted.
+    // if count_s!=0, we can't fold INSERTPSrr into INSERTPSrm
+    //
+    // the following folding can happen when count_s==0
+    // load xmm0, m32
+    // insertpsrr xmm1, xmm0, imm
+    // =>
+    // insertpsrm xmm1, m32, imm
+    { "INSERTPSrr",         "INSERTPSrm",      TB_NO_REVERSE | TB_NO_FORWARD },
+    { "UD1Lr",              "UD1Lm",           TB_NO_REVERSE | TB_NO_FORWARD },
+    { "UD1Qr",              "UD1Qm",           TB_NO_REVERSE | TB_NO_FORWARD },
+    { "UD1Wr",              "UD1Wm",           TB_NO_REVERSE | TB_NO_FORWARD },
+    // Remove {"MMX_MOVQ64rr", "MMX_MOVQ64mr"} since it will create duplicate in
+    // unfolding table due to the {"MMX_MOVD64from64rr", "MMX_MOVQ64mr"}
+    { "MMX_MOVQ64rr",       "MMX_MOVQ64mr",    TB_NO_FORWARD | TB_NO_REVERSE },
+    // Remove {"MMX_MOVQ64rr", "MMX_MOVQ64rm"} since it will create duplicate in
+    // unfolding table due to the {"MMX_MOVD64from64rr", "MMX_MOVQ64rm"}
+    { "MMX_MOVQ64rr",       "MMX_MOVQ64rm",    TB_NO_FORWARD | TB_NO_REVERSE },
+};
+