From bdeb029cbced910b00731c41b35b2c98b8791a5b Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Wed, 12 Apr 2000 11:22:52 +0000 Subject: [PATCH] i386.c (x86_sub_esp_4, [...]): New global variables. * i386.c (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8): New global variables. (ix86_emit_epilogue_adjustment): Do not attempt to use pop for the adjustment. * i386.h (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8): Declare, (TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_ADD_ESP_4, TARGET_ADD_ESP_8): New macros. * i386.md: Add peep2s to convert esp adjustments to push and pop instructions. (pushsi_prologue, popsi_epilogue): New patterns. From-SVN: r33100 --- gcc/ChangeLog | 14 +++++ gcc/config/i386/i386.c | 57 +++++------------- gcc/config/i386/i386.h | 5 ++ gcc/config/i386/i386.md | 152 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 184 insertions(+), 44 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c84f4f2..6705abd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +Fri Apr 7 12:23:04 MET DST 2000 Jan Hubicka + + * i386.c (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8): + New global variables. + (ix86_emit_epilogue_adjustment): Do not attempt to use pop for the + adjustment. + * i386.h (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8): + Declare, + (TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_ADD_ESP_4, + TARGET_ADD_ESP_8): New macros. + * i386.md: Add peep2s to convert esp adjustments to push and pop + instructions. + (pushsi_prologue, popsi_epilogue): New patterns. + 2000-04-12 Jakub Jelinek * real.c (toe64): Remove stale #endif from the last change. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 74de03c..85a7c66 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -218,6 +218,10 @@ const int x86_qimode_math = ~(0); const int x86_promote_qi_regs = 0; const int x86_himode_math = ~(m_PPRO); const int x86_promote_hi_regs = m_PPRO; +const int x86_sub_esp_4 = m_ATHLON | m_PPRO; +const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486; +const int x86_add_esp_4 = m_ATHLON | m_K6; +const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486; #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx)) @@ -1968,51 +1972,16 @@ static void ix86_emit_epilogue_esp_adjustment (tsize) int tsize; { - /* Intel's docs say that for 4 or 8 bytes of stack frame one should - use `pop' and not `add'. */ - int use_pop = tsize == 4; - rtx edx = 0, ecx; - - /* Use two pops only for the Pentium processors. */ - if (tsize == 8 && !TARGET_386 && !TARGET_486) - { - rtx retval = current_function_return_rtx; - - edx = gen_rtx_REG (SImode, 1); - - /* This case is a bit more complex. Since we cannot pop into - %ecx twice we need a second register. But this is only - available if the return value is not of DImode in which - case the %edx register is not available. */ - use_pop = (retval == NULL - || !reg_overlap_mentioned_p (edx, retval)); - } - - if (use_pop) - { - ecx = gen_rtx_REG (SImode, 2); - - /* We have to prevent the two pops here from being scheduled. - GCC otherwise would try in some situation to put other - instructions in between them which has a bad effect. */ - emit_insn (gen_blockage ()); - emit_insn (gen_popsi1 (ecx)); - if (tsize == 8) - emit_insn (gen_popsi1 (edx)); - } + /* If a frame pointer is present, we must be sure to tie the sp + to the fp so that we don't mis-schedule. */ + if (frame_pointer_needed) + emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (tsize), + hard_frame_pointer_rtx)); else - { - /* If a frame pointer is present, we must be sure to tie the sp - to the fp so that we don't mis-schedule. */ - if (frame_pointer_needed) - emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, - stack_pointer_rtx, - GEN_INT (tsize), - hard_frame_pointer_rtx)); - else - emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (tsize))); - } + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (tsize))); } /* Emit code to restore saved registers using MOV insns. First register diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b235af9..50f0825 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -174,6 +174,7 @@ extern const int x86_read_modify, x86_split_long_moves; extern const int x86_promote_QImode, x86_single_stringop; extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs; extern const int x86_promote_hi_regs; +extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8; #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK) #define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK) @@ -201,6 +202,10 @@ extern const int x86_promote_hi_regs; #define TARGET_HIMODE_MATH (x86_himode_math & CPUMASK) #define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & CPUMASK) #define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & CPUMASK) +#define TARGET_ADD_ESP_4 (x86_add_esp_4 & CPUMASK) +#define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK) +#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK) +#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK) #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 98c7616..090d0eb 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1307,6 +1307,24 @@ "push{l}\\t%1" [(set_attr "type" "push")]) +(define_insn "*pushsi2_prologue" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m")) + (set (reg:SI 6) (reg:SI 6))] + "" + "push{l}\\t%1" + [(set_attr "type" "push")]) + +(define_insn "*popsi1_epilogue" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") + (mem:SI (reg:SI 7))) + (set (reg:SI 7) + (plus:SI (reg:SI 7) (const_int 4))) + (set (reg:SI 6) (reg:SI 6))] + "" + "pop{l}\\t%0" + [(set_attr "type" "pop")]) + (define_insn "popsi1" [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") (mem:SI (reg:SI 7))) @@ -9752,6 +9770,140 @@ [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) (clobber (reg:CC 17))])] "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +;; The ESP adjustments can be done by the push and pop instructions. Resulting +;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On +;; many CPUs it is also faster, since special hardware to avoid esp +;; dependancies is present. + +;; While some of these converisons may be done using splitters, we use peepholes +;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL. + +;; Convert prologue esp substractions to push. +;; We need register to push. In order to keep verify_flow_info happy we have +;; two choices +;; - use scratch and clobber it in order to avoid dependencies +;; - use already live register +;; We can't use the second way right now, since there is no reliable way how to +;; verify that given register is live. First choice will also most likely in +;; fewer dependencies. On the place of esp adjustments it is very likely that +;; call clobbered registers are dead. We may want to use base pointer as an +;; alternative when no register is available later. + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) + (set (reg:SI 6) (reg:SI 6)) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (set (reg:SI 6) (reg:SI 6))])]) + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (reg:SI 6) (reg:SI 6)) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (set (reg:SI 6) (reg:SI 6))])]) + +;; Convert esp substractions to push. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))]) + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))]) + +;; Convert epilogue deallocator to pop. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (set (reg:SI 6) (reg:SI 6)) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_ADD_ESP_4" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (set (reg:SI 6) (reg:SI 6))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:SI 0 "r") + (match_scratch:SI 1 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) + (set (reg:SI 6) (reg:SI 6)) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_ADD_ESP_8" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (set (reg:SI 6) (reg:SI 6))]) + (parallel [(set (match_dup 1) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) + (set (reg:SI 6) (reg:SI 6)) + (clobber (reg:CC 17))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (set (reg:SI 6) (reg:SI 6))]) + (parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") + +;; Convert esp additions to pop. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (clobber (reg:CC 17))])] + "" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:SI 0 "r") + (match_scratch:SI 1 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) + (clobber (reg:CC 17))])] + "" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))]) + (parallel [(set (match_dup 1) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) + (clobber (reg:CC 17))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))]) + (parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") ;; Call-value patterns last so that the wildcard operand does not ;; disrupt insn-recog's switch tables. -- 2.7.4