From 1cdfc98a9981768c475fabf069ba4d3e460deb2a Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 27 Mar 2021 00:20:42 +0100 Subject: [PATCH] dwarf2cfi: Defer queued register saves some more [PR99334] On the testcase in the PR with -fno-tree-sink -O3 -fPIC -fomit-frame-pointer -fno-strict-aliasing -mstackrealign we have prologue: 0000000000000000 <_func_with_dwarf_issue_>: 0: 4c 8d 54 24 08 lea 0x8(%rsp),%r10 5: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp 9: 41 ff 72 f8 pushq -0x8(%r10) d: 55 push %rbp e: 48 89 e5 mov %rsp,%rbp 11: 41 57 push %r15 13: 41 56 push %r14 15: 41 55 push %r13 17: 41 54 push %r12 19: 41 52 push %r10 1b: 53 push %rbx 1c: 48 83 ec 20 sub $0x20,%rsp and emit 00000000 0000000000000014 00000000 CIE Version: 1 Augmentation: "zR" Code alignment factor: 1 Data alignment factor: -8 Return address column: 16 Augmentation data: 1b DW_CFA_def_cfa: r7 (rsp) ofs 8 DW_CFA_offset: r16 (rip) at cfa-8 DW_CFA_nop DW_CFA_nop 00000018 0000000000000044 0000001c FDE cie=00000000 pc=0000000000000000..00000000000001d5 DW_CFA_advance_loc: 5 to 0000000000000005 DW_CFA_def_cfa: r10 (r10) ofs 0 DW_CFA_advance_loc: 9 to 000000000000000e DW_CFA_expression: r6 (rbp) (DW_OP_breg6 (rbp): 0) DW_CFA_advance_loc: 13 to 000000000000001b DW_CFA_def_cfa_expression (DW_OP_breg6 (rbp): -40; DW_OP_deref) DW_CFA_expression: r15 (r15) (DW_OP_breg6 (rbp): -8) DW_CFA_expression: r14 (r14) (DW_OP_breg6 (rbp): -16) DW_CFA_expression: r13 (r13) (DW_OP_breg6 (rbp): -24) DW_CFA_expression: r12 (r12) (DW_OP_breg6 (rbp): -32) ... unwind info for that. The problem is when async signal (or stepping through in the debugger) stops after the pushq %rbp instruction and before movq %rsp, %rbp, the unwind info says that caller's %rbp is saved there at *%rbp, but that is not true, caller's %rbp is either still available in the %rbp register, or in *%rsp, only after executing the next instruction - movq %rsp, %rbp - the location for %rbp is correct. So, either we'd need to temporarily say: DW_CFA_advance_loc: 9 to 000000000000000e DW_CFA_expression: r6 (rbp) (DW_OP_breg7 (rsp): 0) DW_CFA_advance_loc: 3 to 0000000000000011 DW_CFA_expression: r6 (rbp) (DW_OP_breg6 (rbp): 0) DW_CFA_advance_loc: 10 to 000000000000001b or to me it seems more compact to just say: DW_CFA_advance_loc: 12 to 0000000000000011 DW_CFA_expression: r6 (rbp) (DW_OP_breg6 (rbp): 0) DW_CFA_advance_loc: 10 to 000000000000001b I've tried instead to deal with it through REG_FRAME_RELATED_EXPR from the backend, but that failed miserably as explained in the PR, dwarf2cfi.c has some rules (Rule 16 to Rule 19) that are specific to the dynamic stack realignment using drap register that only the i386 backend does right now, and by using REG_FRAME_RELATED_EXPR or REG_CFA* notes we can't emulate those rules. The following patch instead does the deferring of the hard frame pointer save rule in dwarf2cfi.c Rule 18 handling and emits it on the (set hfp sp) assignment that must appear shortly after it and adds assertion that it is the case. The difference before/after the patch on the assembly is: --- pr99334.s~ 2021-03-26 15:42:40.881749380 +0100 +++ pr99334.s 2021-03-26 17:38:05.729161910 +0100 @@ -11,8 +11,8 @@ _func_with_dwarf_issue_: andq $-16, %rsp pushq -8(%r10) pushq %rbp - .cfi_escape 0x10,0x6,0x2,0x76,0 movq %rsp, %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 pushq %r15 pushq %r14 pushq %r13 i.e. does just what we IMHO need, after pushq %rbp %rbp still contains parent's frame value and so the save rule doesn't need to be overridden there, ditto at the start of the next insn before the side-effect took effect, and we override it only after it when %rbp already has the right value. If some other target adds dynamic stack realignment in the future and the offset 0 case wouldn't be true there, the code can be adjusted so that it works on all the drap architectures, I'm pretty sure the code would need other adjustments too. For the rule 18 and for the (set hfp sp) after it we already have asserts for the drap cases that check whether the code looks the way i?86/x86_64 emit it currently. 2021-03-26 Jakub Jelinek PR debug/99334 * dwarf2out.h (struct dw_fde_node): Add rule18 member. * dwarf2cfi.c (dwarf2out_frame_debug_expr): When handling (set hfp sp) assignment with drap_reg active, queue reg save for hfp with offset 0 and flush queued reg saves. When handling a push with rule18, defer queueing reg save for hfp and just assert the offset is 0. (scan_trace): Assert that fde->rule18 is false. --- gcc/dwarf2cfi.c | 30 ++++++++++++++++++++++++++---- gcc/dwarf2out.h | 6 ++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/gcc/dwarf2cfi.c b/gcc/dwarf2cfi.c index 2fa9f32..362ff3f 100644 --- a/gcc/dwarf2cfi.c +++ b/gcc/dwarf2cfi.c @@ -1695,9 +1695,19 @@ dwarf2out_frame_debug_expr (rtx expr) if (fde && fde->stack_realign && REGNO (src) == STACK_POINTER_REGNUM) - gcc_assert (REGNO (dest) == HARD_FRAME_POINTER_REGNUM - && fde->drap_reg != INVALID_REGNUM - && cur_cfa->reg != dwf_regno (src)); + { + gcc_assert (REGNO (dest) == HARD_FRAME_POINTER_REGNUM + && fde->drap_reg != INVALID_REGNUM + && cur_cfa->reg != dwf_regno (src) + && fde->rule18); + fde->rule18 = 0; + /* The save of hard frame pointer has been deferred + until this point when Rule 18 applied. Emit it now. */ + queue_reg_save (dest, NULL_RTX, 0); + /* And as the instruction modifies the hard frame pointer, + flush the queue as well. */ + dwarf2out_flush_queued_reg_saves (); + } else queue_reg_save (src, dest, 0); } @@ -1907,6 +1917,7 @@ dwarf2out_frame_debug_expr (rtx expr) { gcc_assert (cur_cfa->reg != dw_frame_pointer_regnum); cur_trace->cfa_store.offset = 0; + fde->rule18 = 1; } if (cur_cfa->reg == dw_stack_pointer_regnum) @@ -2041,7 +2052,17 @@ dwarf2out_frame_debug_expr (rtx expr) span = NULL; if (!span) - queue_reg_save (src, NULL_RTX, offset); + { + if (fde->rule18) + /* Just verify the hard frame pointer save when doing dynamic + realignment uses expected offset. The actual queue_reg_save + needs to be deferred until the instruction that sets + hard frame pointer to stack pointer, see PR99334 for + details. */ + gcc_assert (known_eq (offset, 0)); + else + queue_reg_save (src, NULL_RTX, offset); + } else { /* We have a PARALLEL describing where the contents of SRC live. @@ -2732,6 +2753,7 @@ scan_trace (dw_trace_info *trace, bool entry) create_trace_edges (control); } + gcc_assert (!cfun->fde || !cfun->fde->rule18); add_cfi_insn = NULL; cur_row = NULL; cur_trace = NULL; diff --git a/gcc/dwarf2out.h b/gcc/dwarf2out.h index d659a96..76a9c0a 100644 --- a/gcc/dwarf2out.h +++ b/gcc/dwarf2out.h @@ -108,6 +108,12 @@ struct GTY(()) dw_fde_node { /* True iff dw_fde_second_begin label is in text_section or cold_text_section. */ unsigned second_in_std_section : 1; + /* True if Rule 18 described in dwarf2cfi.c is in action, i.e. for dynamic + stack realignment in between pushing of hard frame pointer to stack + and setting hard frame pointer to stack pointer. The register save for + hard frame pointer register should be emitted only on the latter + instruction. */ + unsigned rule18 : 1; }; -- 2.7.4