From bdeb029cbced910b00731c41b35b2c98b8791a5b Mon Sep 17 00:00:00 2001
From: Jan Hubicka <jh@suse.cz>
Date: Wed, 12 Apr 2000 11:22:52 +0000
Subject: [PATCH] i386.c (x86_sub_esp_4, [...]): New global variables.

	* i386.c (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
	New global variables.
	(ix86_emit_epilogue_adjustment): Do not attempt to use pop for the
	adjustment.
	* i386.h (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
	Declare,
	(TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_ADD_ESP_4,
	 TARGET_ADD_ESP_8): New macros.
	* i386.md: Add peep2s to convert esp adjustments to push and pop
	instructions.
	(pushsi_prologue, popsi_epilogue): New patterns.

From-SVN: r33100
---
 gcc/ChangeLog           |  14 +++++
 gcc/config/i386/i386.c  |  57 +++++-------------
 gcc/config/i386/i386.h  |   5 ++
 gcc/config/i386/i386.md | 152 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 184 insertions(+), 44 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c84f4f2..6705abd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+Fri Apr  7 12:23:04 MET DST 2000  Jan Hubicka  <jh@suse.cz>
+
+	* i386.c (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
+	New global variables.
+	(ix86_emit_epilogue_adjustment): Do not attempt to use pop for the
+	adjustment.
+	* i386.h (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
+	Declare,
+	(TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_ADD_ESP_4,
+	 TARGET_ADD_ESP_8): New macros.
+	* i386.md: Add peep2s to convert esp adjustments to push and pop
+	instructions.
+	(pushsi_prologue, popsi_epilogue): New patterns.
+
 2000-04-12  Jakub Jelinek  <jakub@redhat.com>
 
 	* real.c (toe64): Remove stale #endif from the last change.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 74de03c..85a7c66 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -218,6 +218,10 @@ const int x86_qimode_math = ~(0);
 const int x86_promote_qi_regs = 0;
 const int x86_himode_math = ~(m_PPRO);
 const int x86_promote_hi_regs = m_PPRO;
+const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
+const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
+const int x86_add_esp_4 = m_ATHLON | m_K6;
+const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
 
 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
 
@@ -1968,51 +1972,16 @@ static void
 ix86_emit_epilogue_esp_adjustment (tsize)
      int tsize;
 {
-  /* Intel's docs say that for 4 or 8 bytes of stack frame one should
-     use `pop' and not `add'.  */
-  int use_pop = tsize == 4;
-  rtx edx = 0, ecx;
-
-  /* Use two pops only for the Pentium processors.  */
-  if (tsize == 8 && !TARGET_386 && !TARGET_486)
-    {
-      rtx retval = current_function_return_rtx;
-
-      edx = gen_rtx_REG (SImode, 1);
-
-      /* This case is a bit more complex.  Since we cannot pop into
-         %ecx twice we need a second register.  But this is only
-         available if the return value is not of DImode in which
-         case the %edx register is not available.  */
-      use_pop = (retval == NULL
-		 || !reg_overlap_mentioned_p (edx, retval));
-    }
-
-  if (use_pop)
-    {
-      ecx = gen_rtx_REG (SImode, 2);
-
-      /* We have to prevent the two pops here from being scheduled.
-         GCC otherwise would try in some situation to put other
-         instructions in between them which has a bad effect.  */
-      emit_insn (gen_blockage ());
-      emit_insn (gen_popsi1 (ecx));
-      if (tsize == 8)
-	emit_insn (gen_popsi1 (edx));
-    }
+  /* If a frame pointer is present, we must be sure to tie the sp
+     to the fp so that we don't mis-schedule.  */
+  if (frame_pointer_needed)
+    emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
+					      stack_pointer_rtx,
+					      GEN_INT (tsize),
+					      hard_frame_pointer_rtx));
   else
-    {
-      /* If a frame pointer is present, we must be sure to tie the sp
-	 to the fp so that we don't mis-schedule.  */
-      if (frame_pointer_needed)
-        emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
-						  stack_pointer_rtx,
-						  GEN_INT (tsize),
-						  hard_frame_pointer_rtx));
-      else
-        emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-			       GEN_INT (tsize)));
-    }
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   GEN_INT (tsize)));
 }
 
 /* Emit code to restore saved registers using MOV insns.  First register
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index b235af9..50f0825 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -174,6 +174,7 @@ extern const int x86_read_modify, x86_split_long_moves;
 extern const int x86_promote_QImode, x86_single_stringop;
 extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
 extern const int x86_promote_hi_regs;
+extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
 
 #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
 #define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
@@ -201,6 +202,10 @@ extern const int x86_promote_hi_regs;
 #define TARGET_HIMODE_MATH (x86_himode_math & CPUMASK)
 #define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & CPUMASK)
 #define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & CPUMASK)
+#define TARGET_ADD_ESP_4 (x86_add_esp_4 & CPUMASK)
+#define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK)
+#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK)
+#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
 
 #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 98c7616..090d0eb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1307,6 +1307,24 @@
   "push{l}\\t%1"
   [(set_attr "type" "push")])
 
+(define_insn "*pushsi2_prologue"
+  [(set (match_operand:SI 0 "push_operand" "=<")
+	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))
+   (set (reg:SI 6) (reg:SI 6))]
+  ""
+  "push{l}\\t%1"
+  [(set_attr "type" "push")])
+
+(define_insn "*popsi1_epilogue"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
+	(mem:SI (reg:SI 7)))
+   (set (reg:SI 7)
+	(plus:SI (reg:SI 7) (const_int 4)))
+   (set (reg:SI 6) (reg:SI 6))]
+  ""
+  "pop{l}\\t%0"
+  [(set_attr "type" "pop")])
+
 (define_insn "popsi1"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
 	(mem:SI (reg:SI 7)))
@@ -9752,6 +9770,140 @@
   [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
 	      (clobber (reg:CC 17))])]
   "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+;; The ESP adjustments can be done by the push and pop instructions.  Resulting
+;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes.  On
+;; many CPUs it is also faster, since special hardware to avoid esp
+;; dependancies is present.
+
+;; While some of these converisons may be done using splitters, we use peepholes
+;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL.
+
+;; Convert prologue esp substractions to push.
+;; We need register to push.  In order to keep verify_flow_info happy we have
+;; two choices
+;; - use scratch and clobber it in order to avoid dependencies
+;; - use already live register
+;; We can't use the second way right now, since there is no reliable way how to
+;; verify that given register is live.  First choice will also most likely in
+;; fewer dependencies.  On the place of esp adjustments it is very likely that
+;; call clobbered registers are dead.  We may want to use base pointer as an
+;; alternative when no register is available later.
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4)))
+	      (set (reg:SI 6) (reg:SI 6))
+	      (clobber (reg:CC 17))])]
+  "optimize_size || !TARGET_SUB_ESP_4"
+  [(clobber (match_dup 0))
+   (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+	      (set (reg:SI 6) (reg:SI 6))])])
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+	      (set (reg:SI 6) (reg:SI 6))
+	      (clobber (reg:CC 17))])]
+  "optimize_size || !TARGET_SUB_ESP_8"
+  [(clobber (match_dup 0))
+   (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+   (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+	      (set (reg:SI 6) (reg:SI 6))])])
+
+;; Convert esp substractions to push.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4)))
+	      (clobber (reg:CC 17))])]
+  "optimize_size || !TARGET_SUB_ESP_4"
+  [(clobber (match_dup 0))
+   (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))])
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+	      (clobber (reg:CC 17))])]
+  "optimize_size || !TARGET_SUB_ESP_8"
+  [(clobber (match_dup 0))
+   (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+   (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+	      (set (reg:SI 6) (reg:SI 6))
+	      (clobber (reg:CC 17))])]
+  "optimize_size || !TARGET_ADD_ESP_4"
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+	      (set (reg:SI 6) (reg:SI 6))])]
+  "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (match_scratch:SI 1 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+	      (set (reg:SI 6) (reg:SI 6))
+	      (clobber (reg:CC 17))])]
+  "optimize_size || !TARGET_ADD_ESP_8"
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+	      (set (reg:SI 6) (reg:SI 6))])
+   (parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+  "")
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+	      (set (reg:SI 6) (reg:SI 6))
+	      (clobber (reg:CC 17))])]
+  "optimize_size"
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+	      (set (reg:SI 6) (reg:SI 6))])
+   (parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+  "")
+
+;; Convert esp additions to pop.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+	      (clobber (reg:CC 17))])]
+  ""
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+  "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (match_scratch:SI 1 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+	      (clobber (reg:CC 17))])]
+  ""
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])
+   (parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+  "")
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+	      (clobber (reg:CC 17))])]
+  "optimize_size"
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])
+   (parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+  "")
 
 ;; Call-value patterns last so that the wildcard operand does not
 ;; disrupt insn-recog's switch tables.
-- 
2.7.4