[CodeView] Add source languages ObjC and ObjC++
authorStefan Gränitz <stefan.graenitz@gmail.com>
Fri, 17 Mar 2023 16:08:20 +0000 (17:08 +0100)
committerStefan Gränitz <stefan.graenitz@gmail.com>
Fri, 17 Mar 2023 16:09:31 +0000 (17:09 +0100)
This patch adds llvm::codeview::SourceLanguage entries, DWARF translations, and PDB source file extensions in LLVM and allow LLDB's PDB parsers to recognize them correctly.

The CV_CFL_LANG enum in the Visual Studio 2022 documentation https://learn.microsoft.com/en-us/visualstudio/debugger/debug-interface-access/cv-cfl-lang defines:
```
    CV_CFL_OBJC     = 0x11,
    CV_CFL_OBJCXX   = 0x12,
```

Since the initial commit in D24317, ObjC was emitted as C language and ObjC++ as Masm.

Reviewed By: DavidSpickett

Differential Revision: https://reviews.llvm.org/D146221

lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
llvm/include/llvm/DebugInfo/CodeView/CodeView.h
llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
llvm/lib/DebugInfo/CodeView/EnumTables.cpp
llvm/lib/DebugInfo/PDB/PDBExtras.cpp
llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
llvm/test/DebugInfo/COFF/language.ll
llvm/test/DebugInfo/COFF/objc.ll [new file with mode: 0644]
llvm/test/DebugInfo/COFF/objcpp.ll [new file with mode: 0644]
llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp

index 075d4b042d2aa4cc795177eb507a17b3584908ba..df558c842f9c199b60d4fbf918c4b8b61ff81d06 100644 (file)
@@ -76,6 +76,10 @@ static lldb::LanguageType TranslateLanguage(PDB_Lang lang) {
     return lldb::LanguageType::eLanguageTypeSwift;
   case PDB_Lang::Rust:
     return lldb::LanguageType::eLanguageTypeRust;
+  case PDB_Lang::ObjC:
+    return lldb::LanguageType::eLanguageTypeObjC;
+  case PDB_Lang::ObjCpp:
+    return lldb::LanguageType::eLanguageTypeObjC_plus_plus;
   default:
     return lldb::LanguageType::eLanguageTypeUnknown;
   }
index cb75dd59d3668477707126b7dd97c33e6d4745cf..613e3620712702a4e6d03ea27aaa61d73f9b1a32 100644 (file)
@@ -80,6 +80,10 @@ lldb::LanguageType TranslateLanguage(PDB_Lang lang) {
     return lldb::LanguageType::eLanguageTypeSwift;
   case PDB_Lang::Rust:
     return lldb::LanguageType::eLanguageTypeRust;
+  case PDB_Lang::ObjC:
+    return lldb::LanguageType::eLanguageTypeObjC;
+  case PDB_Lang::ObjCpp:
+    return lldb::LanguageType::eLanguageTypeObjC_plus_plus;
   default:
     return lldb::LanguageType::eLanguageTypeUnknown;
   }
index 010a82dd0e232c0bb8d52150fc8b3c6d29aa3f3b..a9ad99a1d0a84c0d7780293ae3ab6b65c282281b 100644 (file)
@@ -138,8 +138,8 @@ enum class CPUType : uint16_t {
   D3D11_Shader = 0x100,
 };
 
-/// These values correspond to the CV_CFL_LANG enumeration, and are documented
-/// here: https://msdn.microsoft.com/en-us/library/bw3aekw6.aspx
+/// These values correspond to the CV_CFL_LANG enumeration in the Microsoft
+/// Debug Interface Access SDK
 enum SourceLanguage : uint8_t {
   C = 0x00,
   Cpp = 0x01,
@@ -158,6 +158,8 @@ enum SourceLanguage : uint8_t {
   JScript = 0x0e,
   MSIL = 0x0f,
   HLSL = 0x10,
+  ObjC = 0x11,
+  ObjCpp = 0x12,
 
   Rust = 0x15,
 
index fd4ea6e08703eaa4f61011efaa2d60fa216f31b1..86cdd11ebd17e0c8e3f3319aae9457444f607e47 100644 (file)
@@ -569,7 +569,6 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
   case dwarf::DW_LANG_C89:
   case dwarf::DW_LANG_C99:
   case dwarf::DW_LANG_C11:
-  case dwarf::DW_LANG_ObjC:
     return SourceLanguage::C;
   case dwarf::DW_LANG_C_plus_plus:
   case dwarf::DW_LANG_C_plus_plus_03:
@@ -595,6 +594,10 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
     return SourceLanguage::Swift;
   case dwarf::DW_LANG_Rust:
     return SourceLanguage::Rust;
+  case dwarf::DW_LANG_ObjC:
+    return SourceLanguage::ObjC;
+  case dwarf::DW_LANG_ObjC_plus_plus:
+    return SourceLanguage::ObjCpp;
   default:
     // There's no CodeView representation for this language, and CV doesn't
     // have an "unknown" option for the language field, so we'll use MASM,
index 78a258600696e410d876d7b82cd95cb886e2a95a..b2f0099bd01c5bc5cd06f6b0fc9cd46c945a54fe 100644 (file)
@@ -105,6 +105,7 @@ static const EnumEntry<codeview::SourceLanguage> SourceLanguages[] = {
     CV_ENUM_ENT(SourceLanguage, JScript), CV_ENUM_ENT(SourceLanguage, MSIL),
     CV_ENUM_ENT(SourceLanguage, HLSL),    CV_ENUM_ENT(SourceLanguage, D),
     CV_ENUM_ENT(SourceLanguage, Swift),   CV_ENUM_ENT(SourceLanguage, Rust),
+    CV_ENUM_ENT(SourceLanguage, ObjC),    CV_ENUM_ENT(SourceLanguage, ObjCpp),
 };
 
 static const EnumEntry<uint32_t> CompileSym2FlagNames[] = {
index 571510e6bad9c1e38e6d6d90e8f25ed473fbd312..2b318bf1c6488f54aa5b26a628bb3e2e43e2785d 100644 (file)
@@ -232,6 +232,8 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_Lang &Lang) {
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, D, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Swift, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Rust, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, ObjC, OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_Lang, ObjCpp, "ObjC++", OS)
   }
   return OS;
 }
index bd60489b6bed6b57d7e1034f2990fc01f2d03376..437b96677c0b5315c126bde6214e0a1b69098d83 100644 (file)
@@ -102,6 +102,8 @@ std::string PDBSymbolCompiland::getSourceFileFullPath() const {
               .Case(".asm", Lang == PDB_Lang::Masm)
               .Case(".swift", Lang == PDB_Lang::Swift)
               .Case(".rs", Lang == PDB_Lang::Rust)
+              .Case(".m", Lang == PDB_Lang::ObjC)
+              .Case(".mm", Lang == PDB_Lang::ObjCpp)
               .Default(false))
         return File->getFileName();
     }
index 3db67e3ebbbe1d988eed3486abbe34eb5381dd3b..8870acca6abdf136f5e4b0e116ce5b07ee5abf62 100644 (file)
 ;
 ; RUN: sed -e 's/<LANG1>/DW_LANG_Rust/;s/<LANG2>/Rust/' %s > %t
 ; RUN: llc -filetype=obj -o - %t | llvm-readobj --codeview - | FileCheck %t
+;
+; RUN: sed -e 's/<LANG1>/DW_LANG_ObjC/;s/<LANG2>/ObjC/' %s > %t
+; RUN: llc -filetype=obj -o - %t | llvm-readobj --codeview - | FileCheck %t
+;
+; RUN: sed -e 's/<LANG1>/DW_LANG_ObjC_plus_plus/;s/<LANG2>/ObjCpp/' %s > %t
+; RUN: llc -filetype=obj -o - %t | llvm-readobj --codeview - | FileCheck %t
 
 ; CHECK:      CodeViewDebugInfo [
 ; CHECK:        Subsection [
diff --git a/llvm/test/DebugInfo/COFF/objc.ll b/llvm/test/DebugInfo/COFF/objc.ll
new file mode 100644 (file)
index 0000000..7e4ddf8
--- /dev/null
@@ -0,0 +1,35 @@
+; RUN: llc < %s | FileCheck %s --check-prefix=ASM
+; RUN: llc -filetype=obj < %s | llvm-readobj --codeview - | FileCheck %s --check-prefix=OBJ
+
+; ASM:      .short  4412                    # Record kind: S_COMPILE3
+; ASM-NEXT: .long   17                      # Flags and language
+
+; OBJ:       Kind: S_COMPILE3 (0x113C)
+; OBJ-NEXT:  Language: ObjC (0x11)
+
+; ModuleID = 'objc.m'
+source_filename = "objc.m"
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; Function Attrs: uwtable
+define void @f() unnamed_addr #0 !dbg !5 {
+entry:
+  ret void, !dbg !9
+}
+
+attributes #0 = { uwtable "target-cpu"="x86-64" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.dbg.cu = !{!3}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 2, !"CodeView", i32 1}
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DICompileUnit(language: DW_LANG_ObjC, file: !4, producer: "clang version 17.0.0 (https://github.com/llvm/llvm-project a8e9beca6bee1f248ef4be7892802c4d091b7fcb)", isOptimized: false, runtimeVersion: 1, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!4 = !DIFile(filename: "objc.m", directory: "src", checksumkind: CSK_MD5, checksum: "e6ab1d5b7f82464c963a8522037dfa72")
+!5 = distinct !DISubprogram(name: "f", scope: !4, file: !4, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !3, retainedNodes: !8)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !{}
+!9 = !DILocation(line: 1, scope: !5)
diff --git a/llvm/test/DebugInfo/COFF/objcpp.ll b/llvm/test/DebugInfo/COFF/objcpp.ll
new file mode 100644 (file)
index 0000000..49786af
--- /dev/null
@@ -0,0 +1,35 @@
+; RUN: llc < %s | FileCheck %s --check-prefix=ASM
+; RUN: llc -filetype=obj < %s | llvm-readobj --codeview - | FileCheck %s --check-prefix=OBJ
+
+; ASM:      .short  4412                    # Record kind: S_COMPILE3
+; ASM-NEXT: .long   18                      # Flags and language
+
+; OBJ:       Kind: S_COMPILE3 (0x113C)
+; OBJ-NEXT:  Language: ObjCpp (0x12)
+
+; ModuleID = 'objcpp.mm'
+source_filename = "objcpp.mm"
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; Function Attrs: uwtable
+define void @"?f@@YAXXZ"() #0 !dbg !5 {
+entry:
+  ret void, !dbg !9
+}
+
+attributes #0 = { uwtable "target-cpu"="x86-64" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.dbg.cu = !{!3}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 2, !"CodeView", i32 1}
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DICompileUnit(language: DW_LANG_ObjC_plus_plus, file: !4, producer: "clang version 17.0.0 (https://github.com/llvm/llvm-project a8e9beca6bee1f248ef4be7892802c4d091b7fcb)", isOptimized: false, runtimeVersion: 1, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!4 = !DIFile(filename: "objcpp.mm", directory: "src", checksumkind: CSK_MD5, checksum: "e6ab1d5b7f82464c963a8522037dfa72")
+!5 = distinct !DISubprogram(name: "f", linkageName: "?f@@YAXXZ", scope: !4, file: !4, line: 1, type: !6, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !3, retainedNodes: !8)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !{}
+!9 = !DILocation(line: 1, scope: !5)
index 8e17284871a9ad4e8867ec94748dae9c3b05bb0a..46481d37862991a8988a020460565563ef0239bd 100644 (file)
@@ -210,6 +210,8 @@ static std::string formatSourceLanguage(SourceLanguage Lang) {
     RETURN_CASE(SourceLanguage, D, "d");
     RETURN_CASE(SourceLanguage, Swift, "swift");
     RETURN_CASE(SourceLanguage, Rust, "rust");
+    RETURN_CASE(SourceLanguage, ObjC, "objc");
+    RETURN_CASE(SourceLanguage, ObjCpp, "objc++");
   }
   return formatUnknownEnum(Lang);
 }