[MCDisassembler] Disambiguate Size parameter in tryAddingSymbolicOperand()
authorMaksim Panchenko <maks@fb.com>
Thu, 19 May 2022 20:23:40 +0000 (13:23 -0700)
committerMaksim Panchenko <maks@fb.com>
Wed, 25 May 2022 20:44:32 +0000 (13:44 -0700)
MCSymbolizer::tryAddingSymbolicOperand() overloaded the Size parameter
to specify either the instruction size or the operand size depending on
the architecture. However, for proper symbolic disassembly on X86, we
need to know both sizes, as an instruction can have two operands, and
the instruction size cannot be reliably calculated based on the operand
offset and its size. Hence, split Size into OpSize and InstSize.

For X86, the new interface allows to fix a couple of issues:
  * Correctly adjust the value of PC-relative operands.
  * Set operand size to zero when the operand is specified implicitly.

Differential Revision: https://reviews.llvm.org/D126101

21 files changed:
llvm/include/llvm-c/DisassemblerTypes.h
llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
llvm/tools/llvm-objdump/MachODump.cpp
llvm/unittests/MC/X86/CMakeLists.txt [new file with mode: 0644]
llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp [new file with mode: 0644]

index 53baaef..6999a35 100644 (file)
@@ -38,15 +38,15 @@ typedef void *LLVMDisasmContextRef;
  * one operand with symbolic information.  To determine the symbolic operand
  * information for each operand, the bytes for the specific operand in the
  * instruction are specified by the Offset parameter and its byte widith is the
- * size parameter.  For instructions sets with fixed widths and one symbolic
- * operand per instruction, the Offset parameter will be zero and Size parameter
- * will be the instruction width.  The information is returned in TagBuf and is
- * Triple specific with its specific information defined by the value of
- * TagType for that Triple.  If symbolic information is returned the function
- * returns 1, otherwise it returns 0.
+ * OpSize parameter.  For instructions sets with fixed widths and one symbolic
+ * operand per instruction, the Offset parameter will be zero and InstSize
+ * parameter will be the instruction width.  The information is returned in
+ * TagBuf and is Triple specific with its specific information defined by the
+ * value of TagType for that Triple.  If symbolic information is returned the
+ * function * returns 1, otherwise it returns 0.
  */
-typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC,
-                                  uint64_t Offset, uint64_t Size,
+typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset,
+                                  uint64_t OpSize, uint64_t InstSize,
                                   int TagType, void *TagBuf);
 
 /**
index 7060620..de069ff 100644 (file)
@@ -181,10 +181,9 @@ protected:
 
 public:
   // Helpers around MCSymbolizer
-  bool tryAddingSymbolicOperand(MCInst &Inst,
-                                int64_t Value,
-                                uint64_t Address, bool IsBranch,
-                                uint64_t Offset, uint64_t InstSize) const;
+  bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
+                                bool IsBranch, uint64_t Offset, uint64_t OpSize,
+                                uint64_t InstSize) const;
 
   void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
 
index 18aa781..8af3bb2 100644 (file)
@@ -46,7 +46,8 @@ public:
 
   bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
                                 int64_t Value, uint64_t Address, bool IsBranch,
-                                uint64_t Offset, uint64_t InstSize) override;
+                                uint64_t Offset, uint64_t OpSize,
+                                uint64_t InstSize) override;
   void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream,
                                        int64_t Value,
                                        uint64_t Address) override;
index 3e20e13..1efb63f 100644 (file)
@@ -63,12 +63,13 @@ public:
   /// \param Address   - Load address of the instruction.
   /// \param IsBranch  - Is the instruction a branch?
   /// \param Offset    - Byte offset of the operand inside the inst.
+  /// \param OpSize    - Size of the operand in bytes.
   /// \param InstSize  - Size of the instruction in bytes.
   /// \return Whether a symbolic operand was added.
   virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
                                         int64_t Value, uint64_t Address,
                                         bool IsBranch, uint64_t Offset,
-                                        uint64_t InstSize) = 0;
+                                        uint64_t OpSize, uint64_t InstSize) = 0;
 
   /// Try to add a comment on the PC-relative load.
   /// For instance, in Mach-O, this is used to add annotations to instructions
index a95385f..c6035dc 100644 (file)
@@ -22,11 +22,12 @@ MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
 
 bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
                                               uint64_t Address, bool IsBranch,
-                                              uint64_t Offset,
+                                              uint64_t Offset, uint64_t OpSize,
                                               uint64_t InstSize) const {
   if (Symbolizer)
-    return Symbolizer->tryAddingSymbolicOperand(
-        Inst, *CommentStream, Value, Address, IsBranch, Offset, InstSize);
+    return Symbolizer->tryAddingSymbolicOperand(Inst, *CommentStream, Value,
+                                                Address, IsBranch, Offset,
+                                                OpSize, InstSize);
   return false;
 }
 
index 7befef8..e3f4cdd 100644 (file)
@@ -31,19 +31,15 @@ class Triple;
 // is found an MCExpr is created with that, else an MCExpr with Value is
 // created. This function returns true if it adds an operand to the MCInst and
 // false otherwise.
-bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
-                                                    raw_ostream &cStream,
-                                                    int64_t Value,
-                                                    uint64_t Address,
-                                                    bool IsBranch,
-                                                    uint64_t Offset,
-                                                    uint64_t InstSize) {
+bool MCExternalSymbolizer::tryAddingSymbolicOperand(
+    MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address,
+    bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
   struct LLVMOpInfo1 SymbolicOp;
   std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
   SymbolicOp.Value = Value;
 
   if (!GetOpInfo ||
-      !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
+      !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) {
     // Clear SymbolicOp.Value from above and also all other fields.
     std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
 
@@ -53,10 +49,10 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
     // that always makes sense to guess.  But in the case of an immediate it is
     // a bit more questionable if it is an address of a symbol or some other
     // reference.  So if the immediate Value comes from a width of 1 byte,
-    // InstSize, we will not guess it is an address of a symbol.  Because in
+    // OpSize, we will not guess it is an address of a symbol.  Because in
     // object files assembled starting at address 0 this usually leads to
     // incorrect symbolication.
-    if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
+    if (!SymbolLookUp || (OpSize == 1 && !IsBranch))
       return false;
 
     uint64_t ReferenceType;
index ffbed4c..1b65589 100644 (file)
@@ -749,7 +749,7 @@ static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm,
     ImmVal |= ~((1LL << 19) - 1);
 
   if (!Decoder->tryAddingSymbolicOperand(
-          Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 4))
+          Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(ImmVal));
   return Success;
 }
@@ -1030,7 +1030,7 @@ DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr,
   }
 
   DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(offset));
   return Success;
 }
@@ -1640,7 +1640,7 @@ static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
     imm |= ~((1LL << 21) - 1);
 
   DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
-  if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(imm));
 
   return Success;
@@ -1675,7 +1675,7 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
     DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
   }
 
-  if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(ImmVal));
   Inst.addOperand(MCOperand::createImm(12 * ShifterVal));
   return Success;
@@ -1690,7 +1690,7 @@ static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
   if (imm & (1 << (26 - 1)))
     imm |= ~((1LL << 26) - 1);
 
-  if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(imm));
 
   return Success;
@@ -1742,7 +1742,7 @@ static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn,
   else
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
   Inst.addOperand(MCOperand::createImm(bit));
-  if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(dst));
 
   return Success;
index 5b6f06f..11964b2 100644 (file)
@@ -60,7 +60,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
 /// an operand to the MCInst and Fail otherwise.
 bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
     MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
-    bool IsBranch, uint64_t Offset, uint64_t InstSize) {
+    bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
   if (!SymbolLookUp)
     return false;
   // FIXME: This method shares a lot of code with
@@ -73,8 +73,8 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
   SymbolicOp.Value = Value;
   uint64_t ReferenceType;
   const char *ReferenceName;
-  if (!GetOpInfo ||
-      !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+  if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
+                               1, &SymbolicOp)) {
     if (IsBranch) {
       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
       const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
index dc72331..ca677db 100644 (file)
@@ -29,7 +29,8 @@ public:
 
   bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
                                 int64_t Value, uint64_t Address, bool IsBranch,
-                                uint64_t Offset, uint64_t InstSize) override;
+                                uint64_t Offset, uint64_t OpSize,
+                                uint64_t InstSize) override;
 };
 
 } // namespace llvm
index bd75272..4d5c6d1 100644 (file)
@@ -82,7 +82,7 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
   APInt SignedOffset(18, Imm * 4, true);
   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
 
-  if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
+  if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
     return MCDisassembler::Success;
   return addOperand(Inst, MCOperand::createImm(Imm));
 }
@@ -1918,10 +1918,10 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
 //===----------------------------------------------------------------------===//
 
 // Try to find symbol name for specified label
-bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
-                                raw_ostream &/*cStream*/, int64_t Value,
-                                uint64_t /*Address*/, bool IsBranch,
-                                uint64_t /*Offset*/, uint64_t /*InstSize*/) {
+bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
+    MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
+    uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
+    uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
 
   if (!IsBranch) {
     return false;
index f46ad9d..5db6484 100644 (file)
@@ -217,8 +217,8 @@ public:
                    : MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {}
 
   bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
-                                int64_t Value, uint64_t Address,
-                                bool IsBranch, uint64_t Offset,
+                                int64_t Value, uint64_t Address, bool IsBranch,
+                                uint64_t Offset, uint64_t OpSize,
                                 uint64_t InstSize) override;
 
   void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
index 9e59e9d..6181017 100644 (file)
@@ -181,7 +181,7 @@ static bool DecodeSymbolicOperand(MCInst &Inst, uint64_t Address,
                                   const MCDisassembler *Decoder) {
   static const uint64_t AtLeast = 2;
   return (nullptr != Decoder && Decoder->tryAddingSymbolicOperand(
-                                    Inst, Value, Address, true, 0, AtLeast));
+                                    Inst, Value, Address, true, 0, AtLeast, 0));
 }
 
 static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address,
index 8bfa70c..9acd492 100644 (file)
@@ -835,7 +835,8 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
                                      const MCDisassembler *Decoder) {
   // FIXME: Does it make sense for value to be negative?
   return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address,
-                                           isBranch, /* Offset */ 0, InstSize);
+                                           isBranch, /*Offset=*/0, /*OpSize=*/0,
+                                           InstSize);
 }
 
 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
index 58c51e7..58d5df4 100644 (file)
@@ -768,7 +768,8 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
     Bits = 15;
   uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits));
   uint32_t Extended = FullValue + Address;
-  if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4))
+  if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 0,
+                                             4))
     HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
   return MCDisassembler::Success;
 }
index d91670b..e9fecef 100644 (file)
@@ -215,7 +215,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool IsBranch,
                                      uint64_t Width, MCInst &MI,
                                      const MCDisassembler *Decoder) {
   return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset,
-                                           Width);
+                                           Width, /*InstSize=*/0);
 }
 
 static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address,
index da4d927..1825b95 100644 (file)
@@ -509,7 +509,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
                                      uint64_t Width, MCInst &MI,
                                      const MCDisassembler *Decoder) {
   return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
-                                           Width);
+                                           Width, /*InstSize=*/4);
 }
 
 static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address,
index 94f0f10..979141a 100644 (file)
@@ -75,7 +75,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
                                      uint64_t Width, MCInst &MI,
                                      const MCDisassembler *Decoder) {
   return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
-                                           Width);
+                                           Width, /*InstSize=*/0);
 }
 
 static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
index 8ca5c48..e8b9ee6 100644 (file)
@@ -1874,8 +1874,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
   uint64_t pcrel = 0;
   if (type == TYPE_REL) {
     isBranch = true;
-    pcrel = insn.startLocation +
-            insn.immediateOffset + insn.immediateSize;
+    pcrel = insn.startLocation + insn.length;
     switch (operand.encoding) {
     default:
       break;
@@ -1950,9 +1949,9 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
     break;
   }
 
-  if (!Dis->tryAddingSymbolicOperand(mcInst, immediate + pcrel,
-                                     insn.startLocation, isBranch,
-                                     insn.immediateOffset, insn.immediateSize))
+  if (!Dis->tryAddingSymbolicOperand(
+          mcInst, immediate + pcrel, insn.startLocation, isBranch,
+          insn.immediateOffset, insn.immediateSize, insn.length))
     mcInst.addOperand(MCOperand::createImm(immediate));
 
   if (type == TYPE_MOFFS) {
@@ -2089,8 +2088,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
         return true;
       }
       if (insn.mode == MODE_64BIT){
-        pcrel = insn.startLocation +
-                insn.displacementOffset + insn.displacementSize;
+        pcrel = insn.startLocation + insn.length;
         Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
                                              insn.startLocation +
                                                  insn.displacementOffset);
@@ -2153,9 +2151,13 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
   mcInst.addOperand(baseReg);
   mcInst.addOperand(scaleAmount);
   mcInst.addOperand(indexReg);
+
+  const uint8_t dispSize =
+      (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
+
   if (!Dis->tryAddingSymbolicOperand(
           mcInst, insn.displacement + pcrel, insn.startLocation, false,
-          insn.displacementOffset, insn.displacementSize))
+          insn.displacementOffset, dispSize, insn.length))
     mcInst.addOperand(displacement);
   mcInst.addOperand(segmentReg);
   return false;
index c8c31d2..60c3415 100644 (file)
@@ -2608,7 +2608,8 @@ struct DisassembleInfo {
 // value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic
 // information is returned then this function returns 1 else it returns 0.
 static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
-                               uint64_t Size, int TagType, void *TagBuf) {
+                               uint64_t OpSize, uint64_t InstSize, int TagType,
+                               void *TagBuf) {
   struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
   struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf;
   uint64_t value = op_info->Value;
@@ -2625,7 +2626,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
 
   unsigned int Arch = info->O->getArch();
   if (Arch == Triple::x86) {
-    if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
+    if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
       return 0;
     if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
       // TODO:
@@ -2705,7 +2706,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     return 0;
   }
   if (Arch == Triple::x86_64) {
-    if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
+    if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
       return 0;
     // For non MH_OBJECT types, like MH_KEXT_BUNDLE, Search the external
     // relocation entries of a linked image (if any) for an entry that matches
@@ -2737,7 +2738,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
         // adds the Pc.  But for x86_64 external relocation entries the Value
         // is the offset from the external symbol.
         if (info->O->getAnyRelocationPCRel(RE))
-          op_info->Value -= Pc + Offset + Size;
+          op_info->Value -= Pc + InstSize;
         const char *name =
             unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
         op_info->AddSymbol.Present = 1;
@@ -2775,7 +2776,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
       // adds the Pc.  But for x86_64 external relocation entries the Value
       // is the offset from the external symbol.
       if (info->O->getAnyRelocationPCRel(RE))
-        op_info->Value -= Pc + Offset + Size;
+        op_info->Value -= Pc + InstSize;
       const char *name =
           unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
       unsigned Type = info->O->getAnyRelocationType(RE);
@@ -2803,7 +2804,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     return 0;
   }
   if (Arch == Triple::arm) {
-    if (Offset != 0 || (Size != 4 && Size != 2))
+    if (Offset != 0 || (InstSize != 4 && InstSize != 2))
       return 0;
     if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
       // TODO:
@@ -2940,7 +2941,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     return 1;
   }
   if (Arch == Triple::aarch64) {
-    if (Offset != 0 || Size != 4)
+    if (Offset != 0 || InstSize != 4)
       return 0;
     if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
       // TODO:
diff --git a/llvm/unittests/MC/X86/CMakeLists.txt b/llvm/unittests/MC/X86/CMakeLists.txt
new file mode 100644 (file)
index 0000000..212e6e4
--- /dev/null
@@ -0,0 +1,12 @@
+set(LLVM_LINK_COMPONENTS
+  MC
+  MCDisassembler
+  Target
+  X86Desc
+  X86Disassembler
+  X86Info
+  )
+
+add_llvm_unittest(X86MCTests
+  X86MCDisassemblerTest.cpp
+  )
diff --git a/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
new file mode 100644 (file)
index 0000000..97e717c
--- /dev/null
@@ -0,0 +1,145 @@
+//===- X86MCDisassemblerTest.cpp - Tests for X86 MCDisassembler -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+struct Context {
+  const char *TripleName = "x86_64-unknown-elf";
+  std::unique_ptr<MCRegisterInfo> MRI;
+  std::unique_ptr<MCAsmInfo> MAI;
+  std::unique_ptr<MCContext> Ctx;
+  std::unique_ptr<MCSubtargetInfo> STI;
+  std::unique_ptr<MCDisassembler> DisAsm;
+
+  Context() {
+    LLVMInitializeX86TargetInfo();
+    LLVMInitializeX86TargetMC();
+    LLVMInitializeX86Disassembler();
+
+    // If we didn't build x86, do not run the test.
+    std::string Error;
+    const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+    if (!TheTarget)
+      return;
+
+    MRI.reset(TheTarget->createMCRegInfo(TripleName));
+    MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
+    STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+    Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(), MRI.get(),
+                                      STI.get());
+
+    DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx));
+  }
+
+  operator MCContext &() { return *Ctx; };
+};
+
+Context &getContext() {
+  static Context Ctxt;
+  return Ctxt;
+}
+
+class X86MCSymbolizerTest : public MCSymbolizer {
+public:
+  X86MCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {}
+  ~X86MCSymbolizerTest() {}
+
+  struct OpInfo {
+    int64_t Value = 0;
+    uint64_t Offset = 0;
+    uint64_t Size;
+  };
+  std::vector<OpInfo> Operands;
+  uint64_t InstructionSize = 0;
+
+  void reset() {
+    Operands.clear();
+    InstructionSize = 0;
+  }
+
+  bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
+                                int64_t Value, uint64_t Address, bool IsBranch,
+                                uint64_t Offset, uint64_t OpSize,
+                                uint64_t InstSize) override {
+    Operands.push_back({Value, Offset, OpSize});
+    InstructionSize = InstSize;
+    return false;
+  }
+
+  void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+                                       uint64_t Address) override {}
+};
+
+} // namespace
+
+TEST(X86Disassembler, X86MCSymbolizerTest) {
+  X86MCSymbolizerTest *TestSymbolizer = new X86MCSymbolizerTest(getContext());
+  getContext().DisAsm->setSymbolizer(
+      std::unique_ptr<MCSymbolizer>(TestSymbolizer));
+
+  MCDisassembler::DecodeStatus Status;
+  MCInst Inst;
+  uint64_t InstSize;
+
+  auto checkBytes = [&](ArrayRef<uint8_t> Bytes) {
+    TestSymbolizer->reset();
+    Status =
+        getContext().DisAsm->getInstruction(Inst, InstSize, Bytes, 0, nulls());
+    ASSERT_TRUE(Status == MCDisassembler::Success);
+    EXPECT_EQ(TestSymbolizer->InstructionSize, InstSize);
+  };
+
+  auto checkOperand = [&](size_t OpNo, int64_t Value, uint64_t Offset,
+                          uint64_t Size) {
+    ASSERT_TRUE(TestSymbolizer->Operands.size() > OpNo);
+    EXPECT_EQ(TestSymbolizer->Operands[OpNo].Value, Value);
+    EXPECT_EQ(TestSymbolizer->Operands[OpNo].Offset, Offset);
+    EXPECT_EQ(TestSymbolizer->Operands[OpNo].Size, Size);
+  };
+
+  // movq    $0x80000, 0x80000
+  checkBytes(
+      {0x48, 0xc7, 0x04, 0x25, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00});
+  checkOperand(0, 0x80000, 4, 4);
+  checkOperand(1, 0x80000, 8, 4);
+
+  // movq   $0x2a, 0x123(%rax,%r14,8)
+  checkBytes(
+      {0x4a, 0xc7, 0x84, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00});
+  checkOperand(0, 291, 4, 4);
+  checkOperand(1, 42, 8, 4);
+
+  // movq   $0xffffffffffffefe8, -0x1(%rip)
+  // Test that the value of the rip-relative operand is set correctly.
+  // The instruction address is 0 and the size is 12 bytes.
+  checkBytes(
+      {0x48, 0xc7, 0x05, 0xff, 0xff, 0xff, 0xff, 0xe8, 0xef, 0xff, 0xff});
+  checkOperand(0, /*next instr address*/ 11 - /*disp*/ 1, 3, 4);
+  checkOperand(1, 0xffffffffffffefe8, 7, 4);
+
+  // movq   $0xfffffffffffffef5, (%r12)
+  // Test that the displacement operand has a size of 0, since it is not
+  // explicitly specified in the instruction.
+  checkBytes({0x49, 0xc7, 0x04, 0x24, 0xf5, 0xfe, 0xff, 0xff});
+  checkOperand(0, 0, 4, 0);
+  checkOperand(1, 0xfffffffffffffef5, 4, 4);
+}