[CodeView] Add support for local S_CONSTANT records
authorTobias Hieta <tobias@hieta.se>
Tue, 22 Nov 2022 09:10:36 +0000 (10:10 +0100)
committerTobias Hieta <tobias@hieta.se>
Tue, 6 Dec 2022 09:34:01 +0000 (10:34 +0100)
CodeView doesn't have the ability to represent variables
in other ways than as in registers or memory values, but
LLVM very often transforms simple values into constants,
consider this program:

int f () { int i = 123; return i; }

LLVM will transform `i` into a constant value and just
leave behind a llvm.dbg.value, this can't be represented
as a S_LOCAL record in CodeView. But we can represent it
as a S_CONSTANT record.

This patch checks if the location of a debug value is null,
then we will insert a S_CONSTANT record instead of a S_LOCAL
value with the flag "OptimizedAway".

In lld we then output the S_CONSTANT in the right scope, before
they where always inserted in the global stream, now we check
the scope before inserting it.

This has shown to improve debugging for our developers
internally.

Fixes to llvm/llvm-project#55958

Reviewed By: aganea

Differential Revision: https://reviews.llvm.org/D138995

lld/COFF/PDB.cpp
lld/test/COFF/Inputs/pdb-local-constants.s [new file with mode: 0644]
lld/test/COFF/pdb-local-constants.test [new file with mode: 0644]
llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
llvm/test/DebugInfo/COFF/local-constant.ll
llvm/test/DebugInfo/COFF/pieces.ll

index 5d14e1e..ecb4656 100644 (file)
@@ -442,7 +442,6 @@ static bool symbolGoesInModuleStream(const CVSymbol &sym,
                                      unsigned symbolScopeDepth) {
   switch (sym.kind()) {
   case SymbolKind::S_GDATA32:
-  case SymbolKind::S_CONSTANT:
   case SymbolKind::S_GTHREAD32:
   // We really should not be seeing S_PROCREF and S_LPROCREF in the first place
   // since they are synthesized by the linker in response to S_GPROC32 and
@@ -451,8 +450,9 @@ static bool symbolGoesInModuleStream(const CVSymbol &sym,
   case SymbolKind::S_PROCREF:
   case SymbolKind::S_LPROCREF:
     return false;
-  // S_UDT records go in the module stream if it is not a global S_UDT.
+  // S_UDT and S_CONSTANT records go in the module stream if it is not a global record.
   case SymbolKind::S_UDT:
+  case SymbolKind::S_CONSTANT:
     return symbolScopeDepth > 0;
   // S_GDATA32 does not go in the module stream, but S_LDATA32 does.
   case SymbolKind::S_LDATA32:
@@ -465,7 +465,6 @@ static bool symbolGoesInModuleStream(const CVSymbol &sym,
 static bool symbolGoesInGlobalsStream(const CVSymbol &sym,
                                       unsigned symbolScopeDepth) {
   switch (sym.kind()) {
-  case SymbolKind::S_CONSTANT:
   case SymbolKind::S_GDATA32:
   case SymbolKind::S_GTHREAD32:
   case SymbolKind::S_GPROC32:
@@ -482,6 +481,7 @@ static bool symbolGoesInGlobalsStream(const CVSymbol &sym,
   case SymbolKind::S_UDT:
   case SymbolKind::S_LDATA32:
   case SymbolKind::S_LTHREAD32:
+  case SymbolKind::S_CONSTANT:
     return symbolScopeDepth == 0;
   default:
     return false;
diff --git a/lld/test/COFF/Inputs/pdb-local-constants.s b/lld/test/COFF/Inputs/pdb-local-constants.s
new file mode 100644 (file)
index 0000000..42d332e
--- /dev/null
@@ -0,0 +1,192 @@
+       .text
+       .def    @feat.00;
+       .scl    3;
+       .type   0;
+       .endef
+       .globl  @feat.00
+.set @feat.00, 0
+       .file   "local_constant.cpp"
+       .def    main;
+       .scl    2;
+       .type   32;
+       .endef
+       .globl  main                            # -- Begin function main
+       .p2align        4, 0x90
+main:                                   # @main
+.Lfunc_begin0:
+       .cv_func_id 0
+# %bb.0:                                # %entry
+       #DEBUG_VALUE: main:i <- 123
+       .cv_file        1 "/home/tobias/code/llvm-project/build/local_constant.cpp" "C33315002D9B48E67EB3E617E430BC02" 1
+       .cv_loc 0 1 7 0                         # local_constant.cpp:7:0
+       movl    $444, %eax                      # imm = 0x1BC
+       retq
+.Ltmp0:
+.Lfunc_end0:
+                                        # -- End function
+       .section        .debug$S,"dr"
+       .p2align        2, 0x0
+       .long   4                               # Debug section magic
+       .long   241
+       .long   .Ltmp2-.Ltmp1                   # Subsection size
+.Ltmp1:
+       .short  .Ltmp4-.Ltmp3                   # Record length
+.Ltmp3:
+       .short  4353                            # Record kind: S_OBJNAME
+       .long   0                               # Signature
+       .byte   0                               # Object name
+       .p2align        2, 0x0
+.Ltmp4:
+       .short  .Ltmp6-.Ltmp5                   # Record length
+.Ltmp5:
+       .short  4412                            # Record kind: S_COMPILE3
+       .long   1                               # Flags and language
+       .short  208                             # CPUType
+       .short  16                              # Frontend version
+       .short  0
+       .short  0
+       .short  0
+       .short  16000                           # Backend version
+       .short  0
+       .short  0
+       .short  0
+       .asciz  "clang version 16.0.0 (git@github.com:llvm/llvm-project.git eef89bd2b3f4a13efcad176bb4c4dda1b1e202ce)" # Null-terminated compiler version string
+       .p2align        2, 0x0
+.Ltmp6:
+.Ltmp2:
+       .p2align        2, 0x0
+       .long   241                             # Symbol subsection for main
+       .long   .Ltmp8-.Ltmp7                   # Subsection size
+.Ltmp7:
+       .short  .Ltmp10-.Ltmp9                  # Record length
+.Ltmp9:
+       .short  4423                            # Record kind: S_GPROC32_ID
+       .long   0                               # PtrParent
+       .long   0                               # PtrEnd
+       .long   0                               # PtrNext
+       .long   .Lfunc_end0-main                # Code size
+       .long   0                               # Offset after prologue
+       .long   0                               # Offset before epilogue
+       .long   4098                            # Function type index
+       .secrel32       main                    # Function section relative address
+       .secidx main                            # Function section index
+       .byte   0                               # Flags
+       .asciz  "main"                          # Function name
+       .p2align        2, 0x0
+.Ltmp10:
+       .short  .Ltmp12-.Ltmp11                 # Record length
+.Ltmp11:
+       .short  4114                            # Record kind: S_FRAMEPROC
+       .long   0                               # FrameSize
+       .long   0                               # Padding
+       .long   0                               # Offset of padding
+       .long   0                               # Bytes of callee saved registers
+       .long   0                               # Exception handler offset
+       .short  0                               # Exception handler section
+       .long   1056768                         # Flags (defines frame register)
+       .p2align        2, 0x0
+.Ltmp12:
+       .short  .Ltmp14-.Ltmp13                 # Record length
+.Ltmp13:
+       .short  4359                            # Record kind: S_CONSTANT
+       .long   116                             # Type
+       .byte   0x7b, 0x00                      # Value
+       .asciz  "i"                             # Name
+       .p2align        2, 0x0
+.Ltmp14:
+       .short  2                               # Record length
+       .short  4431                            # Record kind: S_PROC_ID_END
+.Ltmp8:
+       .p2align        2, 0x0
+       .cv_linetable   0, main, .Lfunc_end0
+       .long   241                             # Symbol subsection for globals
+       .long   .Ltmp16-.Ltmp15                 # Subsection size
+.Ltmp15:
+       .short  .Ltmp18-.Ltmp17                 # Record length
+.Ltmp17:
+       .short  4359                            # Record kind: S_CONSTANT
+       .long   4099                            # Type
+       .byte   0x41, 0x01                      # Value
+       .asciz  "g_const"                       # Name
+       .p2align        2, 0x0
+.Ltmp18:
+.Ltmp16:
+       .p2align        2, 0x0
+       .cv_filechecksums                       # File index to string table offset subsection
+       .cv_stringtable                         # String table
+       .long   241
+       .long   .Ltmp20-.Ltmp19                 # Subsection size
+.Ltmp19:
+       .short  .Ltmp22-.Ltmp21                 # Record length
+.Ltmp21:
+       .short  4428                            # Record kind: S_BUILDINFO
+       .long   4103                            # LF_BUILDINFO index
+       .p2align        2, 0x0
+.Ltmp22:
+.Ltmp20:
+       .p2align        2, 0x0
+       .section        .debug$T,"dr"
+       .p2align        2, 0x0
+       .long   4                               # Debug section magic
+       # ArgList (0x1000)
+       .short  0x6                             # Record length
+       .short  0x1201                          # Record kind: LF_ARGLIST
+       .long   0x0                             # NumArgs
+       # Procedure (0x1001)
+       .short  0xe                             # Record length
+       .short  0x1008                          # Record kind: LF_PROCEDURE
+       .long   0x74                            # ReturnType: int
+       .byte   0x0                             # CallingConvention: NearC
+       .byte   0x0                             # FunctionOptions
+       .short  0x0                             # NumParameters
+       .long   0x1000                          # ArgListType: ()
+       # FuncId (0x1002)
+       .short  0x12                            # Record length
+       .short  0x1601                          # Record kind: LF_FUNC_ID
+       .long   0x0                             # ParentScope
+       .long   0x1001                          # FunctionType: int ()
+       .asciz  "main"                          # Name
+       .byte   243
+       .byte   242
+       .byte   241
+       # Modifier (0x1003)
+       .short  0xa                             # Record length
+       .short  0x1001                          # Record kind: LF_MODIFIER
+       .long   0x74                            # ModifiedType: int
+       .short  0x1                             # Modifiers ( Const (0x1) )
+       .byte   242
+       .byte   241
+       # StringId (0x1004)
+       .short  0x2e                            # Record length
+       .short  0x1605                          # Record kind: LF_STRING_ID
+       .long   0x0                             # Id
+       .asciz  "/home/tobias/code/llvm-project/build" # StringData
+       .byte   243
+       .byte   242
+       .byte   241
+       # StringId (0x1005)
+       .short  0x1a                            # Record length
+       .short  0x1605                          # Record kind: LF_STRING_ID
+       .long   0x0                             # Id
+       .asciz  "local_constant.cpp"            # StringData
+       .byte   241
+       # StringId (0x1006)
+       .short  0xa                             # Record length
+       .short  0x1605                          # Record kind: LF_STRING_ID
+       .long   0x0                             # Id
+       .byte   0                               # StringData
+       .byte   243
+       .byte   242
+       .byte   241
+       # BuildInfo (0x1007)
+       .short  0x1a                            # Record length
+       .short  0x1603                          # Record kind: LF_BUILDINFO
+       .short  0x5                             # NumArgs
+       .long   0x1004                          # Argument: /home/tobias/code/llvm-project/build
+       .long   0x0                             # Argument
+       .long   0x1005                          # Argument: local_constant.cpp
+       .long   0x1006                          # Argument
+       .long   0x0                             # Argument
+       .byte   242
+       .byte   241
+       .addrsig
diff --git a/lld/test/COFF/pdb-local-constants.test b/lld/test/COFF/pdb-local-constants.test
new file mode 100644 (file)
index 0000000..3a95382
--- /dev/null
@@ -0,0 +1,21 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj %S/Inputs/pdb-local-constants.s -o %t.obj -triple x86_64-windows-msvc
+# RUN: lld-link -entry:main -nodefaultlib %t.obj -out:%t.exe -pdb:%t.pdb -debug
+# RUN: llvm-pdbutil dump -globals -symbols %t.pdb | FileCheck %s
+#
+# Check that LLD puts the S_CONSTANT records in the right scope
+#
+# Compiled from this C code, using
+# clang t.cpp -g -gcodeview -S
+#
+# const int g_const = 321;
+# int main() { int i = 123; return i + g_const; }
+#
+
+CHECK:                       Global Symbols
+CHECK:           40 | S_CONSTANT [size = 20] `g_const`
+CHECK-NEXT:           type = 0x1002 (const int), value = 321
+
+CHECK:                          Symbols
+CHECK:           220 | S_CONSTANT [size = 12] `i`
+CHECK-NEXT            type = 0x0074 (int), value = 123
\ No newline at end of file
index a377743..2cf31ca 100644 (file)
@@ -1343,7 +1343,17 @@ void CodeViewDebug::calculateRanges(
     Optional<DbgVariableLocation> Location =
         DbgVariableLocation::extractFromMachineInstruction(*DVInst);
     if (!Location)
+    {
+      // When we don't have a location this is usually because LLVM has
+      // transformed it into a constant and we only have an llvm.dbg.value. We
+      // can't represent these well in CodeView since S_LOCAL only works on
+      // registers and memory locations. Instead, we will pretend this to be a
+      // constant value to at least have it show up in the debugger.
+      auto Op = DVInst->getDebugOperand(0);
+      if (Op.isImm())
+        Var.ConstantValue = APSInt(APInt(64, Op.getImm()), false);
       continue;
+    }
 
     // CodeView can only express variables in register and variables in memory
     // at a constant offset from a register. However, for variables passed
@@ -2788,9 +2798,19 @@ void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI,
     emitLocalVariable(FI, *L);
 
   // Next emit all non-parameters in the order that we found them.
-  for (const LocalVariable &L : Locals)
-    if (!L.DIVar->isParameter())
-      emitLocalVariable(FI, L);
+  for (const LocalVariable &L : Locals) {
+    if (!L.DIVar->isParameter()) {
+      if (L.ConstantValue) {
+        // If ConstantValue is set we will emit it as a S_CONSTANT instead of a
+        // S_LOCAL in order to be able to represent it at all.
+        const DIType *Ty = L.DIVar->getType();
+        APSInt Val(L.ConstantValue.value());
+        emitConstantSymbolRecord(Ty, Val, std::string(L.DIVar->getName()));
+      } else {
+        emitLocalVariable(FI, L);
+      }
+    }
+  }
 }
 
 void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
index 6f0968e..495822a 100644 (file)
@@ -104,6 +104,7 @@ private:
               SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1>>
         DefRanges;
     bool UseReferenceType = false;
+    std::optional<APSInt> ConstantValue;
   };
 
   struct CVGlobalVariable {
index e5809ec..cf306e7 100644 (file)
@@ -9,18 +9,14 @@
 ;   useint(x);
 ; }
 
-; FIXME: Find a way to describe variables optimized to constants.
-
 ; OBJ:        {{.*}}Proc{{.*}}Sym {
 ; OBJ:           DisplayName: constant_var
 ; OBJ:         }
-; OBJ:         LocalSym {
-; OBJ-NEXT:      Kind:
+; OBJ:         ConstantSym {
+; OBJ-NEXT:      Kind: S_CONSTANT
 ; OBJ-NEXT:      Type: int (0x74)
-; OBJ-NEXT:      Flags [ (0x100)
-; OBJ-NEXT:        IsOptimizedOut (0x100)
-; OBJ-NEXT:      ]
-; OBJ-NEXT:      VarName: x
+; OBJ-NEXT:      Value: 42
+; OBJ-NEXT:      Name: x
 ; OBJ-NEXT:    }
 ; OBJ-NOT:     DefRange
 ; OBJ:         ProcEnd
index 54e266c..c41e3f2 100644 (file)
 
 ; ASM-LABEL:  .short  4423                    # Record kind: S_GPROC32_ID
 ; ASM:        .asciz  "loop_csr"              # Function name
-; ASM:        .short  4414                    # Record kind: S_LOCAL
+; ASM:        .short   4359                    # Record kind: S_CONSTANT
+; ASM:         .long   4099                      # Type
+; ASM:        .byte    0x00, 0x00                # Value
 ; ASM:        .asciz  "o"
-; ASM:        .cv_def_range    [[oy_ox_start]] [[loopskip_start]], subfield_reg, 24, 0
-; ASM:        .cv_def_range    [[oy_ox_start]] [[loopskip_start]], subfield_reg, 23, 4
 
 
 ; OBJ-LABEL: GlobalProcIdSym {
 ; OBJ:         Kind: S_GPROC32_ID (0x1147)
 ; OBJ:         DisplayName: loop_csr
 ; OBJ:       }
-; OBJ:       LocalSym {
-; OBJ:         VarName: o
-; OBJ:       }
-; OBJ:       DefRangeSubfieldRegisterSym {
-; OBJ:         Register: EDI (0x18)
-; OBJ:         MayHaveNoName: 0
-; OBJ:         OffsetInParent: 0
-; OBJ:         LocalVariableAddrRange {
-; OBJ:         }
-; OBJ:       }
-; OBJ:       DefRangeSubfieldRegisterSym {
-; OBJ:         Register: ESI (0x17)
-; OBJ:         MayHaveNoName: 0
-; OBJ:         OffsetInParent: 4
-; OBJ:         LocalVariableAddrRange {
-; OBJ:         }
+; OBJ:       ConstantSym {
+; OBJ:         Kind: S_CONSTANT (0x1107)
+; OBJ:         Type: IntPair (0x1003)
+; OBJ:         Value: 0
+; OBJ:         Name: o
 ; OBJ:       }
 ; OBJ:       ProcEnd {
 ; OBJ:       }
 
 ; ASM-LABEL:  .short  4423                    # Record kind: S_GPROC32_ID
 ; ASM:        .asciz  "bitpiece_spill"        # Function name
-; ASM:        .short  4414                    # Record kind: S_LOCAL
-; ASM:        .asciz  "o"
-; ASM:        .cv_def_range    [[spill_o_x_start]] .Lfunc_end4, reg_rel, 335, 65, 36
+; ASM:         .short  4359                    # Record kind: S_CONSTANT
+; ASM:         .long   4099                      # Type
+; ASM:         .byte   0x00, 0x00                # Value
+; ASM:        .asciz  "o"                     # Name
 
 ; OBJ-LABEL: GlobalProcIdSym {
 ; OBJ:         Kind: S_GPROC32_ID (0x1147)
 ; OBJ:         DisplayName: bitpiece_spill
 ; OBJ:       }
-; OBJ:       LocalSym {
-; OBJ:         VarName: o
-; OBJ:       }
-; OBJ:       DefRangeRegisterRelSym {
-; OBJ:         BaseRegister: RSP (0x14F)
-; OBJ:         HasSpilledUDTMember: Yes
-; OBJ:         OffsetInParent: 4
-; OBJ:         BasePointerOffset: 36
-; OBJ:         LocalVariableAddrRange {
-; OBJ:         }
+; OBJ:       ConstantSym {
+; OBJ:         Kind: S_CONSTANT (0x1107)
+; OBJ:         Type: IntPair (0x1003)
+; OBJ:         Value: 0
+; OBJ:         Name: o
 ; OBJ:       }
 ; OBJ:       ProcEnd {
 ; OBJ:       }