[PDB][NativeSession] Clean up some things in NativeSession.
authorAmy Huang <akhuang@google.com>
Tue, 21 Jul 2020 23:54:52 +0000 (16:54 -0700)
committerAmy Huang <akhuang@google.com>
Tue, 21 Jul 2020 23:54:52 +0000 (16:54 -0700)
-Use the actual sect/offset to keep track of symbols in the cache so they don't get created multiple times with different addresses.
-Remove getSymTag from PDBFunctionSymbol/PDBPublicSymbol because it's already implemented in the base class
-Merge the symbolizer test files for DIA and native, since the tests are the same.
-Implement getCompilandId for NativeLineNumber

Reviewed By: amccarth

Differential Revision: https://reviews.llvm.org/D84208

12 files changed:
llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h
llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h
llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp
llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp
llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp
llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
llvm/test/tools/llvm-symbolizer/pdb/pdb-columns.test [moved from llvm/test/tools/llvm-symbolizer/pdb/pdb-native-columns.test with 90% similarity]
llvm/test/tools/llvm-symbolizer/pdb/pdb-native.test [deleted file]
llvm/test/tools/llvm-symbolizer/pdb/pdb.test

index 4adf89f..9d3ba88 100644 (file)
@@ -30,7 +30,6 @@ public:
   uint32_t getAddressOffset() const override;
   uint32_t getAddressSection() const override;
   std::string getName() const override;
-  PDB_SymType getSymTag() const override;
   uint64_t getLength() const override;
   uint32_t getRelativeVirtualAddress() const override;
   uint64_t getVirtualAddress() const override;
index a7ce82c..5dedc70 100644 (file)
@@ -22,7 +22,7 @@ public:
                             const codeview::LineInfo Line,
                             uint32_t ColumnNumber, uint32_t Length,
                             uint32_t Section, uint32_t Offset,
-                            uint32_t SrcFileId);
+                            uint32_t SrcFileId, uint32_t CompilandId);
 
   uint32_t getLineNumber() const override;
   uint32_t getLineNumberEnd() const override;
@@ -45,6 +45,7 @@ private:
   uint32_t Offset;
   uint32_t Length;
   uint32_t SrcFileId;
+  uint32_t CompilandId;
 };
 } // namespace pdb
 } // namespace llvm
index 0a14515..9f410e2 100644 (file)
@@ -30,7 +30,6 @@ public:
   uint32_t getAddressOffset() const override;
   uint32_t getAddressSection() const override;
   std::string getName() const override;
-  PDB_SymType getSymTag() const override;
   uint32_t getRelativeVirtualAddress() const override;
   uint64_t getVirtualAddress() const override;
 
index 90fd19a..e78c534 100644 (file)
@@ -61,7 +61,7 @@ class SymbolCache {
   DenseMap<uint32_t, SymIndexId> GlobalOffsetToSymbolId;
 
   /// Map from segment and code offset to SymIndexId.
-  DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> AddressToFunctionSymId;
+  DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> AddressToSymbolId;
   DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> AddressToPublicSymId;
 
   /// Map from virtual address to module index.
index 2537daa..cd77fab 100644 (file)
@@ -19,7 +19,7 @@ using namespace llvm::pdb;
 NativeFunctionSymbol::NativeFunctionSymbol(NativeSession &Session,
                                            SymIndexId Id,
                                            const codeview::ProcSym &Sym)
-    : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {}
+    : NativeRawSymbol(Session, PDB_SymType::Function, Id), Sym(Sym) {}
 
 NativeFunctionSymbol::~NativeFunctionSymbol() {}
 
@@ -42,10 +42,6 @@ std::string NativeFunctionSymbol::getName() const {
   return std::string(Sym.Name);
 }
 
-PDB_SymType NativeFunctionSymbol::getSymTag() const {
-  return PDB_SymType::Function;
-}
-
 uint64_t NativeFunctionSymbol::getLength() const { return Sym.CodeSize; }
 
 uint32_t NativeFunctionSymbol::getRelativeVirtualAddress() const {
index 2535e09..155ed0c 100644 (file)
@@ -15,9 +15,10 @@ NativeLineNumber::NativeLineNumber(const NativeSession &Session,
                                    const codeview::LineInfo Line,
                                    uint32_t ColumnNumber, uint32_t Section,
                                    uint32_t Offset, uint32_t Length,
-                                   uint32_t SrcFileId)
+                                   uint32_t SrcFileId, uint32_t CompilandId)
     : Session(Session), Line(Line), ColumnNumber(ColumnNumber),
-      Section(Section), Offset(Offset), Length(Length), SrcFileId(SrcFileId) {}
+      Section(Section), Offset(Offset), Length(Length), SrcFileId(SrcFileId),
+      CompilandId(CompilandId) {}
 
 uint32_t NativeLineNumber::getLineNumber() const { return Line.getStartLine(); }
 
@@ -45,6 +46,6 @@ uint32_t NativeLineNumber::getLength() const { return Length; }
 
 uint32_t NativeLineNumber::getSourceFileId() const { return SrcFileId; }
 
-uint32_t NativeLineNumber::getCompilandId() const { return 0; }
+uint32_t NativeLineNumber::getCompilandId() const { return CompilandId; }
 
 bool NativeLineNumber::isStatement() const { return Line.isStatement(); }
index 7086af7..1265e68 100644 (file)
@@ -18,7 +18,7 @@ using namespace llvm::pdb;
 
 NativePublicSymbol::NativePublicSymbol(NativeSession &Session, SymIndexId Id,
                                        const codeview::PublicSym32 &Sym)
-    : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {}
+    : NativeRawSymbol(Session, PDB_SymType::PublicSymbol, Id), Sym(Sym) {}
 
 NativePublicSymbol::~NativePublicSymbol() {}
 
@@ -39,10 +39,6 @@ std::string NativePublicSymbol::getName() const {
   return std::string(Sym.Name);
 }
 
-PDB_SymType NativePublicSymbol::getSymTag() const {
-  return PDB_SymType::PublicSymbol;
-}
-
 uint32_t NativePublicSymbol::getRelativeVirtualAddress() const {
   return Session.getRVAFromSectOffset(Sym.Segment, Sym.Offset);
 }
index ac8449d..acd2902 100644 (file)
@@ -272,14 +272,14 @@ NativeSession::findLineNumbersByAddress(uint64_t Address,
 
 std::unique_ptr<IPDBEnumLineNumbers>
 NativeSession::findLineNumbersByRVA(uint32_t RVA, uint32_t Length) const {
-  return findLineNumbersByAddress(getLoadAddress() + RVA, Length);
+  return Cache.findLineNumbersByVA(getLoadAddress() + RVA, Length);
 }
 
 std::unique_ptr<IPDBEnumLineNumbers>
 NativeSession::findLineNumbersBySectOffset(uint32_t Section, uint32_t Offset,
                                            uint32_t Length) const {
   uint64_t VA = getVAFromSectOffset(Section, Offset);
-  return findLineNumbersByAddress(VA, Length);
+  return Cache.findLineNumbersByVA(VA, Length);
 }
 
 std::unique_ptr<IPDBEnumSourceFiles>
index 9f15907..db402d1 100644 (file)
@@ -319,8 +319,16 @@ SymbolCache::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset,
     return findFunctionSymbolBySectOffset(Sect, Offset);
   case PDB_SymType::PublicSymbol:
     return findPublicSymbolBySectOffset(Sect, Offset);
+  case PDB_SymType::Compiland: {
+    Optional<uint16_t> Modi =
+        getModuleIndexForAddr(Session.getVAFromSectOffset(Sect, Offset));
+    if (!Modi)
+      return nullptr;
+    return getOrCreateCompiland(*Modi);
+  }
   case PDB_SymType::None: {
-    // FIXME: Implement for PDB_SymType::Data.
+    // FIXME: Implement for PDB_SymType::Data. The symbolizer calls this but
+    // only uses it to find the symbol length.
     if (auto Sym = findFunctionSymbolBySectOffset(Sect, Offset))
       return Sym;
     return nullptr;
@@ -332,8 +340,8 @@ SymbolCache::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset,
 
 std::unique_ptr<PDBSymbol>
 SymbolCache::findFunctionSymbolBySectOffset(uint32_t Sect, uint32_t Offset) {
-  auto Iter = AddressToFunctionSymId.find({Sect, Offset});
-  if (Iter != AddressToFunctionSymId.end())
+  auto Iter = AddressToSymbolId.find({Sect, Offset});
+  if (Iter != AddressToSymbolId.end())
     return getSymbolById(Iter->second);
 
   if (!Dbi)
@@ -357,8 +365,14 @@ SymbolCache::findFunctionSymbolBySectOffset(uint32_t Sect, uint32_t Offset) {
     auto PS = cantFail(SymbolDeserializer::deserializeAs<ProcSym>(*I));
     if (Sect == PS.Segment && Offset >= PS.CodeOffset &&
         Offset < PS.CodeOffset + PS.CodeSize) {
+      // Check if the symbol is already cached.
+      auto Found = AddressToSymbolId.find({PS.Segment, PS.CodeOffset});
+      if (Found != AddressToSymbolId.end())
+        return getSymbolById(Found->second);
+
+      // Otherwise, create a new symbol.
       SymIndexId Id = createSymbol<NativeFunctionSymbol>(PS);
-      AddressToFunctionSymId.insert({{Sect, Offset}, Id});
+      AddressToSymbolId.insert({{PS.Segment, PS.CodeOffset}, Id});
       return getSymbolById(Id);
     }
 
@@ -418,9 +432,16 @@ SymbolCache::findPublicSymbolBySectOffset(uint32_t Sect, uint32_t Offset) {
     consumeError(Sym.takeError());
     return nullptr;
   }
+
+  // Check if the symbol is already cached.
   auto PS = cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym.get()));
+  auto Found = AddressToPublicSymId.find({PS.Segment, PS.Offset});
+  if (Found != AddressToPublicSymId.end())
+    return getSymbolById(Found->second);
+
+  // Otherwise, create a new symbol.
   SymIndexId Id = createSymbol<NativePublicSymbol>(PS);
-  AddressToPublicSymId.insert({{Sect, Offset}, Id});
+  AddressToPublicSymId.insert({{PS.Segment, PS.Offset}, Id});
   return getSymbolById(Id);
 }
 
@@ -587,7 +608,7 @@ SymbolCache::findLineNumbersByVA(uint64_t VA, uint32_t Length) const {
         ExpectedChecksums->getArray().at(LineIter->FileNameIndex);
     uint32_t SrcFileId = getOrCreateSourceFile(*ChecksumIter);
     NativeLineNumber LineNum(Session, LineIter->Line, LineIter->ColumnNumber,
-                             LineSect, LineOff, LineLength, SrcFileId);
+                             LineSect, LineOff, LineLength, SrcFileId, Modi);
     LineNumbers.push_back(LineNum);
     ++LineIter;
   }
@@ -6,6 +6,8 @@ RUN: echo 0x140006C20 >> %t.input
 RUN: echo 0x140006C30 >> %t.input
 RUN: echo 0x140006C40 >> %t.input
 RUN: echo 0x140006C70 >> %t.input
+RUN:    llvm-symbolizer -obj="%p/Inputs/test-columns.exe" < %t.input \
+RUN:    | FileCheck %s
 RUN:    llvm-symbolizer -obj="%p/Inputs/test-columns.exe" -use-native-pdb-reader < %t.input \
 RUN:    | FileCheck %s
 
diff --git a/llvm/test/tools/llvm-symbolizer/pdb/pdb-native.test b/llvm/test/tools/llvm-symbolizer/pdb/pdb-native.test
deleted file mode 100644 (file)
index 9356bbc..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-RUN: echo 0x401380 > %t.input
-RUN: echo 0x401390 >> %t.input
-RUN: echo 0x4013A0 >> %t.input
-RUN: echo 0x4013C0 >> %t.input
-RUN: echo 0x4013D0 >> %t.input
-RUN: echo 0x4013E0 >> %t.input
-RUN: echo 0x4013F0 >> %t.input
-RUN: echo 0x401420 >> %t.input
-RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -use-native-pdb-reader < %t.input \
-RUN:    | FileCheck %s
-RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false -use-native-pdb-reader < %t.input \
-RUN:    | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
-
-Subtract ImageBase from all the offsets and run the test again with
---relative-address.
-
-RUN: %python -c 'import sys;print("\n".join([hex(int(x, 16) - 0x400000) for x in sys.stdin]))' < %t.input \
-RUN:    | llvm-symbolizer -obj="%p/Inputs/test.exe" -use-native-pdb-reader -demangle=false --relative-address \
-RUN:    | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
-
-CHECK: foo(void)
-CHECK-NEXT: test.cpp:10
-CHECK: {{^private_symbol$}}
-CHECK-NEXT: test.cpp:13:0
-CHECK: {{^main}}
-CHECK-NEXT: test.cpp:16:0
-CHECK: {{^foo_cdecl$}}
-CHECK: {{^foo_stdcall$}}
-CHECK: {{^foo_fastcall$}}
-CHECK: {{^foo_vectorcall$}}
-CHECK: NS::Foo::bar(void)
-CHECK-NEXT: test.cpp:6:0
-
-CHECK-NO-DEMANGLE: ?foo@@YAXXZ
-CHECK-NO-DEMANGLE-NEXT: test.cpp:10
-CHECK-NO-DEMANGLE: private_symbol
-CHECK-NO-DEMANGLE-NEXT: test.cpp:13
-CHECK-NO-DEMANGLE: _main
-CHECK-NO-DEMANGLE-NEXT: test.cpp:16
-CHECK-NO-DEMANGLE: _foo_cdecl
-CHECK-NO-DEMANGLE: _foo_stdcall@0
-CHECK-NO-DEMANGLE: @foo_fastcall@0
-CHECK-NO-DEMANGLE: foo_vectorcall@@0
-CHECK-NO-DEMANGLE: ?bar@Foo@NS@@QAEXXZ
-CHECK-NO-DEMANGLE-NEXT: test.cpp:6
index 062c040..df0e232 100644 (file)
@@ -11,6 +11,12 @@ RUN:    | FileCheck %s
 RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false < %t.input \
 RUN:    | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
 
+Test with native pdb reader.
+RUN: llvm-symbolizer -use-native-pdb-reader -obj="%p/Inputs/test.exe" < %t.input \
+RUN:    | FileCheck %s
+RUN: llvm-symbolizer -use-native-pdb-reader -obj="%p/Inputs/test.exe" -demangle=false < %t.input \
+RUN:    | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
+
 Subtract ImageBase from all the offsets and run the test again with
 --relative-address.
 
@@ -18,6 +24,10 @@ RUN: %python -c 'import sys;print("\n".join([hex(int(x, 16) - 0x400000) for x in
 RUN:    | llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false --relative-address \
 RUN:    | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
 
+RUN: %python -c 'import sys;print("\n".join([hex(int(x, 16) - 0x400000) for x in sys.stdin]))' < %t.input \
+RUN:    | llvm-symbolizer -use-native-pdb-reader -obj="%p/Inputs/test.exe" -demangle=false --relative-address \
+RUN:    | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
+
 CHECK: foo(void)
 CHECK-NEXT: test.cpp:10
 CHECK: {{^private_symbol$}}