From 367242d3da71d3774f5cf58898e561dcf7bdc2d8 Mon Sep 17 00:00:00 2001
From: rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Sat, 18 Apr 1998 01:24:59 +0000
Subject: [PATCH] Sat Apr 18 01:23:11 1998  John Carr  <jfc@mit.edu>         *
 sparc.c, sparc.h, sparc.md, sol2.h: Many changes related to V9         code
 generation.  Use 64 bit instructions in 32 bit mode when         possible. 
 Use V9 return instruction.  UltraSPARC optimizations.         * sparc.h:
 Change gen_rtx (CODE to gen_rtx_CODE (.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@19278 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog             |   8 +
 gcc/config/sparc/sol2.h   |   6 +
 gcc/config/sparc/sparc.c  | 546 +++++++++++++++++++--------
 gcc/config/sparc/sparc.h  | 188 +++++-----
 gcc/config/sparc/sparc.md | 926 ++++++++++++++++++++++++++++------------------
 5 files changed, 1088 insertions(+), 586 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9b52ad1..3100c60 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+Sat Apr 18 01:23:11 1998  John Carr  <jfc@mit.edu>
+
+	* sparc.c, sparc.h, sparc.md, sol2.h: Many changes related to V9
+	code generation.  Use 64 bit instructions in 32 bit mode when
+	possible.  Use V9 return instruction.  UltraSPARC optimizations.
+
+        * sparc.h: Change gen_rtx (CODE to gen_rtx_CODE (.
+
 Fri Apr 17 22:38:17 1998  Jeffrey A Law  (law@cygnus.com)
 
 	* global.c (global_alloc): Don't pass HARD_CONST (0) to find_reg,
diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h
index f0c3b13..2c8c5f3 100644
--- a/gcc/config/sparc/sol2.h
+++ b/gcc/config/sparc/sol2.h
@@ -198,3 +198,9 @@ Boston, MA 02111-1307, USA.  */
 #define TARGET_LIVE_G0	0
 #undef TARGET_BROKEN_SAVERESTORE
 #define TARGET_BROKEN_SAVERESTORE 0
+
+/* Solaris allows 64 bit out and global registers in 32 bit mode.
+   sparc_override_options will disable V8+ if not generating V9 code.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU + MASK_V8PLUS)
+
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 36ccb15..caebb08 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -22,17 +22,7 @@ the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
 #include "config.h"
-#include <stdio.h>
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-#ifdef HAVE_STRING_H
-#include <string.h>
-#else
-#ifdef HAVE_STRINGS_H
-#include <strings.h>
-#endif
-#endif
+#include "system.h"
 #include "tree.h"
 #include "rtl.h"
 #include "regs.h"
@@ -208,11 +198,9 @@ sparc_override_options ()
     { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
     /* TEMIC sparclet */
     { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
-    /* "v8plus" is what Sun calls Solaris2.5 running on UltraSPARC's.  */
-    { "v8plus",     PROCESSOR_V8PLUS, MASK_ISA, MASK_V8PLUS },
     { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
     /* TI ultrasparc */
-    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V8PLUS },
+    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
     { 0 }
   };
   struct cpu_table *cpu;
@@ -288,6 +276,10 @@ sparc_override_options ()
   if (TARGET_V9 && TARGET_ARCH32)
     target_flags |= MASK_DEPRECATED_V8_INSNS;
 
+  /* V8PLUS requires V9 */
+  if (! TARGET_V9)
+    target_flags &= ~MASK_V8PLUS;
+
   /* Validate -malign-loops= value, or provide default.  */
   if (sparc_align_loops_string)
     {
@@ -333,40 +325,6 @@ sparc_override_options ()
   sparc_init_modes ();
 }
 
-/* Float conversions (v9 only).
-
-   The floating point registers cannot hold DImode values because SUBREG's
-   on them get the wrong register.   "(subreg:SI (reg:DI M int-reg) 0)" is the
-   same as "(subreg:SI (reg:DI N float-reg) 1)", but gcc doesn't know how to
-   turn the "0" to a "1".  Therefore, we must explicitly do the conversions
-   to/from int/fp regs.  `sparc64_fpconv_stack_slot' is the address of an
-   8 byte stack slot used during the transfer.
-   ??? I could have used [%fp-16] but I didn't want to add yet another
-   dependence on this.  */
-/* ??? Can we use assign_stack_temp here?  */
-
-static rtx fpconv_stack_temp;
-
-/* Called once for each function.  */
-
-void
-sparc_init_expanders ()
-{
-  fpconv_stack_temp = NULL_RTX;
-}
-
-/* Assign a stack temp for fp/int DImode conversions.  */
-
-rtx
-sparc64_fpconv_stack_temp ()
-{
-  if (fpconv_stack_temp == NULL_RTX)
-    fpconv_stack_temp =
-      assign_stack_local (DImode, GET_MODE_SIZE (DImode), 0);
-
-  return fpconv_stack_temp;
-}
-
 /* Miscellaneous utilities.  */
 
 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
@@ -380,6 +338,14 @@ v9_regcmp_p (code)
 	  || code == LE || code == GT);
 }
 
+/* 32 bit registers are zero extended so only zero/non-zero comparisons
+   work.  */
+int
+v8plus_regcmp_p (code)
+     enum rtx_code code;
+{
+  return (code == EQ || code == NE);
+}
 
 /* Operand constraints.  */
 
@@ -798,6 +764,16 @@ v9_regcmp_op (op, mode)
   return v9_regcmp_p (code);
 }
 
+int
+v8plus_regcmp_op (op, mode)
+     register rtx op;
+     enum machine_mode mode;
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == EQ || code == NE);
+}
+
 /* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation.  */
 
 int
@@ -848,8 +824,13 @@ arith_operand (op, mode)
      rtx op;
      enum machine_mode mode;
 {
-  return (register_operand (op, mode)
-	  || (GET_CODE (op) == CONST_INT && SMALL_INT (op)));
+  int val;
+  if (register_operand (op, mode))
+    return 1;
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  val = INTVAL (op) & 0xffffffff;
+  return SPARC_SIMM13_P (val);
 }
 
 /* Return true if OP is a register, or is a CONST_INT that can fit in a
@@ -1059,8 +1040,15 @@ gen_compare_reg (code, x, y)
   else
     cc_reg = gen_rtx (REG, mode, SPARC_ICC_REG);
 
-  emit_insn (gen_rtx (SET, VOIDmode, cc_reg,
-		      gen_rtx (COMPARE, mode, x, y)));
+  if (TARGET_V8PLUS && mode == CCXmode)
+    {
+      emit_insn (gen_cmpdi_v8plus (x, y));
+    }
+  else
+    {
+      emit_insn (gen_rtx (SET, VOIDmode, cc_reg,
+			  gen_rtx (COMPARE, mode, x, y)));
+    }
 
   return cc_reg;
 }
@@ -1287,14 +1275,53 @@ eligible_for_epilogue_delay (trial, slot)
 	       || register_operand (XEXP (src, 1), DImode)))
     return 1;
 
-  /* This matches "*return_subsi".  */
-  else if (GET_CODE (src) == MINUS
-      && register_operand (XEXP (src, 0), SImode)
-      && small_int (XEXP (src, 1), VOIDmode)
-      && INTVAL (XEXP (src, 1)) != -4096)
+  return 0;
+}
+
+static int
+check_return_regs (x)
+     rtx x;
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+      return IN_OR_GLOBAL_P (x);
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
     return 1;
 
+    case SET:
+    case IOR:
+    case AND:
+    case XOR:
+    case PLUS:
+    case MINUS:
+      if (check_return_regs (XEXP (x, 1)) == 0)
   return 0;
+    case NOT:
+    case NEG:
+    case MEM:
+      return check_return_regs (XEXP (x, 0));
+      
+    default:
+      return 0;
+    }
+
+}
+
+/* Return 1 if TRIAL references only in and global registers.  */
+int
+eligible_for_return_delay (trial)
+     rtx trial;
+{
+  if (GET_CODE (PATTERN (trial)) != SET)
+    return 0;
+
+  return check_return_regs (PATTERN (trial));
 }
 
 int
@@ -1346,6 +1373,10 @@ reg_unused_after (reg, insn)
 /* The table we use to reference PIC data.  */
 static rtx global_offset_table;
 
+/* The function we use to get at it.  */
+static rtx get_pc_symbol;
+static char get_pc_symbol_name[256];
+
 /* Ensure that we are not using patterns that are not OK with PIC.  */
 
 int
@@ -1499,61 +1530,11 @@ initialize_pic ()
 static rtx
 pic_setup_code ()
 {
-  rtx pic_pc_rtx;
-  rtx l1, l2;
   rtx seq;
 
   start_sequence ();
-
-  /* If -O0, show the PIC register remains live before this.  */
-  if (obey_regdecls)
-    emit_insn (gen_rtx (USE, VOIDmode, pic_offset_table_rtx));
-    
-  l1 = gen_label_rtx ();
-
-  pic_pc_rtx = gen_rtx (CONST, Pmode,
-			gen_rtx (MINUS, Pmode,
-				 global_offset_table,
-				 gen_rtx (CONST, Pmode,
-					  gen_rtx (MINUS, Pmode,
-						   gen_rtx (LABEL_REF,
-							    VOIDmode, l1),
-						   pc_rtx))));
-
-  /* sparc64: the RDPC instruction doesn't pair, and puts 4 bubbles in the
-     pipe to boot.  So don't use it here, especially when we're
-     doing a save anyway because of %l7.  */
-
-  l2 = gen_label_rtx ();
-  emit_label (l1);
-
-  /* Iff we are doing delay branch optimization, slot the sethi up
-     here so that it will fill the delay slot of the call.  */
-  if (flag_delayed_branch)
-    emit_insn (gen_rtx (SET, VOIDmode, pic_offset_table_rtx,
-			gen_rtx (HIGH, Pmode, pic_pc_rtx)));
-
-  /* Note that we pun calls and jumps here!  */
-  emit_jump_insn (gen_get_pc_via_call (l2, l1));
-
-  emit_label (l2);
-
-  if (!flag_delayed_branch)
-    emit_insn (gen_rtx (SET, VOIDmode, pic_offset_table_rtx,
-			gen_rtx (HIGH, Pmode, pic_pc_rtx)));
-
-  emit_insn (gen_rtx (SET, VOIDmode,
-		      pic_offset_table_rtx,
-		      gen_rtx (LO_SUM, Pmode,
-			       pic_offset_table_rtx, pic_pc_rtx)));
-  emit_insn (gen_rtx (SET, VOIDmode,
-		      pic_offset_table_rtx,
-		      gen_rtx (PLUS, Pmode,
-			       pic_offset_table_rtx,
-			       gen_rtx (REG, Pmode, 15))));
-
-  /* emit_insn (gen_rtx (ASM_INPUT, VOIDmode, "!#PROLOGUE# 1")); */
-
+  emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
+			 get_pc_symbol));
   seq = gen_sequence ();
   end_sequence ();
 
@@ -1575,9 +1556,21 @@ finalize_pic ()
   if (! flag_pic)
     abort ();
 
+  /* If we havn't emitted the special get_pc helper function, do so now.  */
+  if (get_pc_symbol_name[0] == 0)
+    {
+      ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
+
+      text_section ();
+      ASM_OUTPUT_ALIGN (asm_out_file, 3);
+      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
+      fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
+    }
+
   /* Initialize every time through, since we can't easily
      know this to be permanent.  */
   global_offset_table = gen_rtx (SYMBOL_REF, Pmode, "_GLOBAL_OFFSET_TABLE_");
+  get_pc_symbol = gen_rtx (SYMBOL_REF, Pmode, get_pc_symbol_name);
   flag_pic = 0;
 
   emit_insn_after (pic_setup_code (), get_insns ());
@@ -1618,6 +1611,15 @@ emit_move_sequence (operands, mode)
   /* Handle most common case first: storing into a register.  */
   if (register_operand (operand0, mode))
     {
+      /* Integer constant to FP register. */
+      if (GET_CODE (operand0) == REG
+	  && REGNO (operand0) >= 32
+	  && REGNO (operand0) < FIRST_PSEUDO_REGISTER
+	  && CONSTANT_P (operand1))
+	{
+	  operand1 = validize_mem (force_const_mem (GET_MODE (operand0), operand1));
+	}
+
       if (register_operand (operand1, mode)
 	  || (GET_CODE (operand1) == CONST_INT && SMALL_INT (operand1))
 	  || (GET_CODE (operand1) == CONST_DOUBLE
@@ -1683,6 +1685,7 @@ emit_move_sequence (operands, mode)
 	}
       else if (GET_CODE (operand1) == CONST_INT
 	       ? (! SMALL_INT (operand1)
+		  && INTVAL (operand1) != -4096
 		  && ! SPARC_SETHI_P (INTVAL (operand1)))
 	       : GET_CODE (operand1) == CONST_DOUBLE
 	       ? ! arith_double_operand (operand1, DImode)
@@ -1704,16 +1707,20 @@ emit_move_sequence (operands, mode)
 	  rtx temp = ((reload_in_progress || mode == DImode)
 		      ? operand0 : gen_reg_rtx (mode));
 
+	  if (mode == SImode)
+	    {
+	      if (GET_CODE (operand1) == CONST_INT)
+		operand1 = GEN_INT (INTVAL (operand1) & 0xffffffff);
+	      else if (GET_CODE (operand1) == CONST_DOUBLE)
+		operand1 = GEN_INT (CONST_DOUBLE_LOW (operand1) & 0xffffffff);
+	    }
+
 	  if (TARGET_ARCH64 && mode == DImode)
 	    emit_insn (gen_sethi_di_sp64 (temp, operand1));
 	  else
 	    emit_insn (gen_rtx (SET, VOIDmode, temp,
 				gen_rtx (HIGH, mode, operand1)));
 
-	  if (GET_CODE (operand1) == CONST_INT)
-	    operand1 = GEN_INT (INTVAL (operand1) & 0xffffffff);
-	  else if (GET_CODE (operand1) == CONST_DOUBLE)
-	    operand1 = GEN_INT (CONST_DOUBLE_LOW (operand1) & 0xffffffff);
 	  operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
 	}
     }
@@ -1763,10 +1770,16 @@ singlemove_string (operands)
       else
 	return "sethi %%hi(%a1),%0";
     }
-  else if (GET_CODE (operands[1]) == CONST_INT
-	   && ! CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'I'))
+  else if (GET_CODE (operands[1]) == CONST_INT)
     {
-      HOST_WIDE_INT i = INTVAL (operands[1]);
+      /* Only consider the low 32 bits of the constant. */
+      int i = INTVAL (operands[1]) & 0xffffffff;
+
+      if (SPARC_SIMM13_P (i))
+	return "mov %1,%0";
+
+      if (i == 4096)
+	return "sub %%g0,-4096,%0";
 
       /* If all low order 10 bits are clear, then we only need a single
 	 sethi insn to load the constant.  */
@@ -2291,9 +2304,9 @@ output_move_quad (operands)
 	  operands[2] = adj_offsettable_operand (mem, 8);
 	  /* ??? In arch64 case, shouldn't we use ldd/std for fp regs.  */
 	  if (mem == op1)
-	    return TARGET_ARCH64 ? "ldx %1,%0;ldx %2,%R0" : "ldd %1,%0;ldd %2,%S0";
+	    return TARGET_ARCH64 ? "ldx %1,%0\n\tldx %2,%R0" : "ldd %1,%0\n\tldd %2,%S0";
 	  else
-	    return TARGET_ARCH64 ? "stx %1,%0;stx %R1,%2" : "std %1,%0;std %S1,%2";
+	    return TARGET_ARCH64 ? "stx %1,%0\n\tstx %R1,%2" : "std %1,%0\n\tstd %S1,%2";
 	}
     }
 
@@ -2968,13 +2981,10 @@ enum sparc_mode_class {
 /* Modes for double-float and smaller quantities.  */
 #define DF_MODES (S_MODES | D_MODES)
 
-/* ??? Sparc64 fp regs cannot hold DImode values.  */
-#define DF_MODES64 (SF_MODES | (1 << (int) DF_MODE) /* | (1 << (int) D_MODE)*/)
+#define DF_MODES64 DF_MODES
 
 /* Modes for double-float only quantities.  */
-/* ??? Sparc64 fp regs cannot hold DImode values.
-   See fix_truncsfdi2.  */
-#define DF_ONLY_MODES ((1 << (int) DF_MODE) /*| (1 << (int) D_MODE)*/)
+#define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
 
 /* Modes for double-float and larger quantities.  */
 #define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
@@ -2985,8 +2995,6 @@ enum sparc_mode_class {
 /* Modes for quad-float and smaller quantities.  */
 #define TF_MODES (DF_MODES | TF_ONLY_MODES)
 
-/* ??? Sparc64 fp regs cannot hold DImode values.
-   See fix_truncsfdi2.  */
 #define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
 
 /* Modes for condition codes.  */
@@ -3115,7 +3123,9 @@ sparc_init_modes ()
   /* Initialize the array used by REGNO_REG_CLASS.  */
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     {
-      if (i < 32)
+      if (i < 16 && TARGET_V8PLUS)
+	sparc_regno_reg_class[i] = I64_REGS;
+      else if (i < 32)
 	sparc_regno_reg_class[i] = GENERAL_REGS;
       else if (i < 64)
 	sparc_regno_reg_class[i] = FP_REGS;
@@ -3584,6 +3594,8 @@ output_function_epilogue (file, size, leaf_function)
 						   PATTERN (insn)));
 	      final_scan_insn (insn, file, 1, 0, 1);
 	    }
+	  else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
+	    fputs ("\treturn %i7+8\n\tnop\n", file);
 	  else
 	    fprintf (file, "\t%s\n\trestore\n", ret);
 	}
@@ -4566,22 +4578,77 @@ output_v9branch (op, reg, label, reversed, annul, noop)
   return string;
 }
 
-/* Output assembler code to return from a function.  */
+/* Renumber registers in delay slot.  Replace registers instead of
+   renumbering because they may be shared.
 
-/* ??? v9: Update to use the new `return' instruction.  Also, add patterns to
-   md file for the `return' instruction.  */
+   This does not handle instructions other than move.  */
+
+static void
+epilogue_renumber (where)
+     rtx *where;
+{
+  rtx x = *where;
+  enum rtx_code code = GET_CODE (x);
+
+  switch (code)
+    {
+    case MEM:
+      *where = x = copy_rtx (x);
+      epilogue_renumber (&XEXP (x, 0));
+      return;
+
+    case REG:
+      {
+	int regno = REGNO (x);
+	if (regno > 8 && regno < 24)
+	  abort ();
+	if (regno >= 24 && regno < 32)
+	  *where = gen_rtx_REG (GET_MODE (x), regno - 16);
+	return;
+      }
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return;
+
+    case IOR:
+    case AND:
+    case XOR:
+    case PLUS:
+    case MINUS:
+      epilogue_renumber (&XEXP (x, 1));
+    case NEG:
+    case NOT:
+      epilogue_renumber (&XEXP (x, 0));
+      return;
+
+    default:
+      debug_rtx (*where);
+      abort();
+    }
+}
+
+/* Output assembler code to return from a function.  */
 
 char *
 output_return (operands)
      rtx *operands;
 {
+  rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
+
   if (leaf_label)
     {
       operands[0] = leaf_label;
-      return "b,a %l0";
+      return "b%* %l0%(";
     }
   else if (leaf_function)
     {
+      /* No delay slot in a leaf function.  */
+      if (delay)
+	abort ();
+
       /* If we didn't allocate a frame pointer for the current function,
 	 the stack pointer might have been adjusted.  Output code to
 	 restore it now.  */
@@ -4621,8 +4688,22 @@ output_return (operands)
 	    return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
 	}
     }
+  else if (TARGET_V9)
+    {
+      if (delay)
+	{
+	  epilogue_renumber (&SET_DEST (PATTERN (delay)));
+	  epilogue_renumber (&SET_SRC (PATTERN (delay)));
+	}
+      if (SKIP_CALLERS_UNIMP_P)
+	return "return %%i7+12%#";
+      else
+	return "return %%i7+8%#";
+    }
   else
     {
+      if (delay)
+	abort ();
       if (SKIP_CALLERS_UNIMP_P)
 	return "jmp %%i7+12\n\trestore";
       else
@@ -4795,14 +4876,14 @@ print_operand (file, x, code)
       /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
 	 Always emit a nop in case the next instruction is a branch.  */
       if (dbr_sequence_length () == 0
-	  && (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
+	  && (optimize && (int)sparc_cpu < PROCESSOR_V9))
 	fputs (",a", file);
       return;
     case '(':
       /* Output a 'nop' if there's nothing for the delay slot and we are
 	 not optimizing.  This is always used with '*' above.  */
       if (dbr_sequence_length () == 0
-	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
+	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
 	fputs ("\n\tnop", file);
       return;
     case '_':
@@ -6066,7 +6147,8 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
   dep_type = get_attr_type (dep_insn);                  
 
 #define SLOW_FP(dep_type) \
-(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)   
+(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
+
   switch (REG_NOTE_KIND (link))
     {                                              
     case 0:                                        
@@ -6080,16 +6162,16 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
 	case TYPE_FPSTORE:
 	  if (! SLOW_FP (dep_type))        
 	    return 0;                                     
-	  break;
+	  return cost;
 
 	case TYPE_STORE:                                  
 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
 	    return cost;     
 
+	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
 	  /* The dependency between the two instructions is on the data
 	     that is being stored.  Assume that the address of the store
 	     is not also dependent.  */
-	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
 	    return 0;                                
 	  return cost;                                   
 
@@ -6109,15 +6191,15 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
 		 compensate for a dependency which might not really    
 		 exist, and 0.  */                                      
 	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
-		  || GET_CODE (SET_DEST (pat)) != MEM         
-		  || GET_CODE (SET_SRC (dep_pat)) != MEM
-		  || ! rtx_equal_p (XEXP (SET_DEST (pat), 0),
-				    XEXP (SET_SRC (dep_pat), 0)))
+		  || GET_CODE (SET_SRC (pat)) != MEM
+		  || GET_CODE (SET_DEST (dep_pat)) != MEM
+		  || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
+				    XEXP (SET_DEST (dep_pat), 0)))
 		return cost + 2;
 
 	      return cost + 8;         
 	    }                                                                   
-	  break;                                                                
+	  return cost;
 
 	case TYPE_BRANCH:                                  
 	  /* Compare to branch latency is 0.  There is no benefit from
@@ -6128,16 +6210,15 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
 	     compare to conditional move.  */                        
 	  if (dep_type == TYPE_FPCMP)                             
 	    return cost - 1;                                           
-	  break;                                                        
+	  return cost;
 
 	case TYPE_FPCMOVE:                                    
 	  /* FMOVR class instructions can not issue in the same cycle
 	     or the cycle after an instruction which writes any
 	     integer register.  Model this as cost 2 for dependent
 	     instructions.  */  
-	  if (GET_CODE (PATTERN (insn)) == SET
-	      && (GET_MODE (SET_DEST (PATTERN (insn))) == SFmode
-	          || GET_MODE (SET_DEST (PATTERN (insn))) == DFmode)            
+	  if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
+	       || dep_type == TYPE_BINARY)
 	      && cost < 2)                                                      
 	    return 2;
 	  /* Otherwise check as for integer conditional moves. */
@@ -6149,7 +6230,7 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
 	     to model.  */                        
 	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)                  
 	    return cost + 3;                                           
-	  break;                                                        
+	  return cost;
 
 	default:
 	  break;
@@ -6190,9 +6271,8 @@ sparc_issue_rate ()
     {
     default:                                 
       return 1;                                                    
-    case PROCESSOR_V8PLUS:                                         
     case PROCESSOR_V9:                                                
-      /* Assume these generic V9 types are capable of at least dual-issue.  */
+      /* Assume V9 processors are capable of at least dual-issue.  */
       return 2;
     case PROCESSOR_SUPERSPARC:                                        
       return 3;                                                      
@@ -6200,3 +6280,175 @@ sparc_issue_rate ()
       return 4;                                                    
     }
 }
+
+static int
+set_extends(x, insn)
+     rtx x, insn;
+{
+  register rtx pat = PATTERN (insn);
+
+  switch (GET_CODE (SET_SRC (pat)))
+    {
+      /* Load and some shift instructions zero extend. */
+    case MEM:
+    case ZERO_EXTEND:
+      /* sethi clears the high bits */
+    case HIGH:
+      /* LO_SUM is used with sethi.  sethi cleared the high
+	 bits and the values used with lo_sum are positive */
+    case LO_SUM:
+      /* UNSPEC is v8plus_clear_high */
+    case UNSPEC:
+      /* Store flag stores 0 or 1 */
+    case LT: case LTU:
+    case GT: case GTU:
+    case LE: case LEU:
+    case GE: case GEU:
+    case EQ:
+    case NE:
+      return 1;
+    case AND:
+      {
+	rtx op1 = XEXP (SET_SRC (pat), 1);
+	if (GET_CODE (op1) == CONST_INT)
+	  return INTVAL (op1) >= 0;
+	if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
+	    && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
+	  return 1;
+	if (GET_CODE (op1) == REG
+	    && sparc_check_64 ((op1), insn) == 1)
+	  return 1;
+      }
+    case ASHIFT:
+    case LSHIFTRT:
+      return GET_MODE (SET_SRC (pat)) == SImode;
+      /* Positive integers leave the high bits zero. */
+    case CONST_DOUBLE:
+      return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
+    case CONST_INT:
+      return ! (INTVAL (x) & 0x80000000);
+    case ASHIFTRT:
+    case SIGN_EXTEND:
+      return - (GET_MODE (SET_SRC (pat)) == SImode);
+    default:
+      return 0;
+    }
+}
+
+/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
+   unknown.  Return 1 if the high bits are zero, -1 if the register is
+   sign extended.  */
+int
+sparc_check_64 (x, insn)
+     rtx x, insn;
+{
+  /* If a register is set only once it is safe to ignore insns this
+     code does not know how to handle.  The loop will either recognize
+     the single set and return the correct value or fail to recognize
+     it and return 0.  */
+  int set_once = 0;
+
+  if (GET_CODE (x) == REG
+      && flag_expensive_optimizations
+      && REG_N_SETS (REGNO (x)) == 1)
+    set_once = 1;
+
+  if (insn == 0)
+    if (set_once)
+      insn = get_last_insn_anywhere ();
+    else
+      return 0;
+
+  while (insn = PREV_INSN (insn))
+    {
+      switch (GET_CODE (insn))
+	{
+	case JUMP_INSN:
+	case NOTE:
+	  break;
+	case CODE_LABEL:
+	case CALL_INSN:
+	default:
+	  if (! set_once)
+	    return 0;
+	  break;
+	case INSN:
+	  {
+	    rtx pat = PATTERN (insn);
+	    if (GET_CODE (pat) != SET)
+	      return 0;
+	    if (rtx_equal_p (x, SET_DEST (pat)))
+	      return set_extends (x, insn);
+	    if (reg_overlap_mentioned_p (SET_DEST (pat), x))
+	      return 0;
+	  }
+	}
+    }
+  return 0;
+}
+
+char *
+sparc_v8plus_shift (operands, insn, opcode)
+     rtx *operands;
+     rtx insn;
+     char *opcode;
+{
+  static char asm_code[60];
+
+  if (GET_CODE (operands[3]) == SCRATCH)
+    operands[3] = operands[0];
+  output_asm_insn ("sllx %H1,32,%3", operands);
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn ("srl %L1,0,%L1", operands);
+  output_asm_insn ("or %L1,%3,%3", operands);
+
+  strcpy(asm_code, opcode);
+  if (which_alternative != 2)
+    return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
+  else
+    return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
+}
+
+
+/* Return 1 if DEST and SRC reference only global and in registers. */
+
+int
+sparc_return_peephole_ok (dest, src)
+     rtx dest, src;
+{
+  if (! TARGET_V9)
+    return 0;
+  if (leaf_function)
+    return 0;
+  if (GET_CODE (src) != CONST_INT
+      && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
+    return 0;
+  return IN_OR_GLOBAL_P (dest);
+}
+
+int
+delay_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+    case CONST_INT:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+
+    case MEM:
+      return delay_operand (XEXP (op, 0), Pmode);
+
+    case REG:
+      return IN_OR_GLOBAL_P (op);
+
+    case PLUS:
+      return delay_operand (XEXP (op, 0), Pmode) && delay_operand (XEXP (op, 1), Pmode);
+
+    default:
+      return 0;
+    }
+}
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 1659e68..c573f40 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -158,7 +158,6 @@ Unrecognized value in TARGET_CPU_DEFAULT.
 %{mcpu=f930:-D__sparclite__} %{mcpu=f934:-D__sparclite__} \
 %{mcpu=v8:-D__sparc_v8__} \
 %{mcpu=supersparc:-D__supersparc__ -D__sparc_v8__} \
-%{mcpu=v8plus:-D__sparc_v9__} \
 %{mcpu=v9:-D__sparc_v9__} \
 %{mcpu=ultrasparc:-D__sparc_v9__} \
 %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
@@ -209,9 +208,9 @@ Unrecognized value in TARGET_CPU_DEFAULT.
 %{mf930:-Asparclite} %{mf934:-Asparclite} \
 %{mcpu=sparclite:-Asparclite} \
 %{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \
-%{mcpu=v8plus:-Av8plus} \
+%{mv8plus:-Av8plus} \
 %{mcpu=v9:-Av9} \
-%{mcpu=ultrasparc:-Av9a} \
+%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
 %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(asm_cpu_default)}}}}}}} \
 "
 
@@ -453,13 +452,17 @@ extern int target_flags;
 #define MASK_VIS 0x1000000          
 #define TARGET_VIS (target_flags & MASK_VIS)
 
-/* Compile for Solaris V8+.  64 bit instructions are available but the
-   high 32 bits of all registers except the globals and current outs may
-   be cleared at any time.  */                 
+/* Compile for Solaris V8+.  32 bit Solaris preserves the high bits of
+   the current out and global registers.  Linux saves the high bits on
+   context switches but not signals.  */
 #define MASK_V8PLUS 0x2000000                 
 #define TARGET_V8PLUS (target_flags & MASK_V8PLUS)                            
 
-/* See sparc.md */
+/* TARGET_HARD_MUL: Use hardware multiply instructions but not %y.
+   TARGET_HARD_MUL32: Use hardware multiply instructions with rd %y
+   to get high 32 bits.  False in V8+ or V9 because multiply stores
+   a 64 bit result in a register.  */
+
 #define TARGET_HARD_MUL32				\
   ((TARGET_V8 || TARGET_SPARCLITE			\
     || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS)	\
@@ -495,6 +498,8 @@ extern int target_flags;
     {"no-app-regs", -MASK_APP_REGS},	\
     {"hard-quad-float", MASK_HARD_QUAD}, \
     {"soft-quad-float", -MASK_HARD_QUAD}, \
+    {"v8plus", MASK_V8PLUS},		\
+    {"no-v8plus", -MASK_V8PLUS},	\
     {"vis", MASK_VIS},			\
     /* ??? These are deprecated, coerced to -mcpu=.  Delete in 2.9.  */ \
     {"cypress", 0},			\
@@ -502,7 +507,6 @@ extern int target_flags;
     {"f930", 0},			\
     {"f934", 0},			\
     {"v8", 0},				\
-    {"v8plus", 0},			\
     {"supersparc", 0},			\
     /* End of deprecated options.  */	\
     /* -mptrNN exists for *experimental* purposes.  */ \
@@ -535,7 +539,6 @@ enum processor_type {
   PROCESSOR_F934,
   PROCESSOR_SPARCLET,
   PROCESSOR_TSC701,
-  PROCESSOR_V8PLUS,
   PROCESSOR_V9,
   PROCESSOR_ULTRASPARC
 };
@@ -977,6 +980,12 @@ while (0)
        : (GET_MODE_SIZE (MODE) + 3) / 4)				\
    : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
 
+/* A subreg in 64 bit mode will have the wrong offset for a floating point
+   register.  The least significant part is at offset 1, compared to 0 for
+   integer registers.  */
+#define ALTER_HARD_SUBREG(TMODE, WORD, FMODE, REGNO)			\
+     (TARGET_ARCH64 && (REGNO) >= 32 && (REGNO) < 96 && (TMODE) == SImode ? 1 : ((REGNO) + (WORD)))
+
 /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
    See sparc.c for how we initialize this.  */
 extern int *hard_regno_mode_classes;
@@ -1093,14 +1102,14 @@ extern int sparc_mode_class[];
 #define STRUCT_VALUE \
   (TARGET_ARCH64					\
    ? 0							\
-   : gen_rtx (MEM, Pmode,				\
-	      gen_rtx (PLUS, Pmode, stack_pointer_rtx,	\
+   : gen_rtx_MEM (Pmode,				\
+		  gen_rtx_PLUS (Pmode, stack_pointer_rtx, \
 		       GEN_INT (STRUCT_VALUE_OFFSET))))
 #define STRUCT_VALUE_INCOMING \
   (TARGET_ARCH64					\
    ? 0							\
-   : gen_rtx (MEM, Pmode,				\
-	      gen_rtx (PLUS, Pmode, frame_pointer_rtx,	\
+   : gen_rtx_MEM (Pmode,				\
+		  gen_rtx_PLUS (Pmode, frame_pointer_rtx, \
 		       GEN_INT (STRUCT_VALUE_OFFSET))))
 
 /* Define the classes of registers for register constraints in the
@@ -1157,8 +1166,8 @@ extern int sparc_mode_class[];
    ??? Should %fcc[0123] be handled similarly?
 */
 
-enum reg_class { NO_REGS, FPCC_REGS, GENERAL_REGS, FP_REGS, EXTRA_FP_REGS,
-		 GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS,
+enum reg_class { NO_REGS, FPCC_REGS, I64_REGS, GENERAL_REGS, FP_REGS,
+		 EXTRA_FP_REGS, GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS,
 		 ALL_REGS, LIM_REG_CLASSES };
 
 #define N_REG_CLASSES (int) LIM_REG_CLASSES
@@ -1166,15 +1175,16 @@ enum reg_class { NO_REGS, FPCC_REGS, GENERAL_REGS, FP_REGS, EXTRA_FP_REGS,
 /* Give names of register classes as strings for dump file.   */
 
 #define REG_CLASS_NAMES \
-  { "NO_REGS", "FPCC_REGS", "GENERAL_REGS", "FP_REGS", "EXTRA_FP_REGS", \
-    "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS", "ALL_REGS" }
+  { "NO_REGS", "FPCC_REGS", "I64_REGS", "GENERAL_REGS", "FP_REGS",	\
+     "EXTRA_FP_REGS", "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS",	\
+     "ALL_REGS" }
 
 /* Define which registers fit in which classes.
    This is an initializer for a vector of HARD_REG_SET
    of length N_REG_CLASSES.  */
 
 #define REG_CLASS_CONTENTS \
-  {{0, 0, 0, 0}, {0, 0, 0, 0xf}, \
+  {{0, 0, 0, 0}, {0, 0, 0, 0xf}, {0xffff, 0, 0, 0}, \
    {-1, 0, 0, 0}, {0, -1, 0, 0}, {0, -1, -1, 0}, \
    {-1, -1, 0, 0}, {-1, -1, -1, 0}, {-1, -1, -1, 0x1f}}
 
@@ -1266,15 +1276,18 @@ extern char leaf_reg_remap[];
 /* Get reg_class from a letter such as appears in the machine description.
    In the not-v9 case, coerce v9's 'e' class to 'f', so we can use 'e' in the
    .md file for v8 and v9.
-   Use 'd' and 'b' for single precision VIS operations if TARGET_VIS.  */
+   'd' and 'b' are used for single and double precision VIS operations,
+   if TARGET_VIS.
+   'h' is used for V8+ 64 bit global and out registers. */
 
 #define REG_CLASS_FROM_LETTER(C)		\
 (TARGET_V9					\
  ? ((C) == 'f' ? FP_REGS			\
     : (C) == 'e' ? EXTRA_FP_REGS 		\
     : (C) == 'c' ? FPCC_REGS			\
-    : ((C) == 'd' && TARGET_VIS) ? FP_REGS	\
-    : ((C) == 'b' && TARGET_VIS) ? FP_REGS	\
+    : ((C) == 'd' && TARGET_VIS) ? FP_REGS\
+    : ((C) == 'b' && TARGET_VIS) ? EXTRA_FP_REGS\
+    : ((C) == 'h' && TARGET_V8PLUS) ? I64_REGS\
     : NO_REGS)					\
  : ((C) == 'f' ? FP_REGS			\
     : (C) == 'e' ? FP_REGS			\
@@ -1299,6 +1312,8 @@ extern char leaf_reg_remap[];
 /* 10 and 11 bit immediates are only used for a few specific insns.
    SMALL_INT is used throughout the port so we continue to use it.  */
 #define SMALL_INT(X) (SPARC_SIMM13_P (INTVAL (X)))
+/* 13 bit immediate, considering only the low 32 bits */
+#define SMALL_INT32(X) (SPARC_SIMM13_P ((int)INTVAL (X) & 0xffffffff))
 #define SPARC_SETHI_P(X) \
 (((unsigned HOST_WIDE_INT) (X) & ~(unsigned HOST_WIDE_INT) 0xfffffc00) == 0)
 
@@ -1366,7 +1381,7 @@ extern char leaf_reg_remap[];
 #define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
   (get_frame_size () == 0						\
    ? assign_stack_local (MODE, GET_MODE_SIZE (MODE), 0)			\
-   : gen_rtx (MEM, MODE, gen_rtx (PLUS, Pmode, frame_pointer_rtx,	\
+   : gen_rtx_MEM (MODE, gen_rtx_PLUS (Pmode, frame_pointer_rtx,	\
 				  GEN_INT (STARTING_FRAME_OFFSET))))
 
 /* Get_secondary_mem widens it's argument to BITS_PER_WORD which loses on v9
@@ -1501,18 +1516,18 @@ extern char leaf_reg_remap[];
 /* On SPARC the value is found in the first "output" register.  */
 
 #define FUNCTION_VALUE(VALTYPE, FUNC)  \
-  gen_rtx (REG, TYPE_MODE (VALTYPE), BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
+  gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
 
 /* But the called function leaves it in the first "input" register.  */
 
 #define FUNCTION_OUTGOING_VALUE(VALTYPE, FUNC)  \
-  gen_rtx (REG, TYPE_MODE (VALTYPE), BASE_OUTGOING_VALUE_REG (TYPE_MODE (VALTYPE)))
+  gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_OUTGOING_VALUE_REG (TYPE_MODE (VALTYPE)))
 
 /* Define how to find the value returned by a library function
    assuming the value has mode MODE.  */
 
 #define LIBCALL_VALUE(MODE)	\
-  gen_rtx (REG, MODE, BASE_RETURN_VALUE_REG (MODE))
+  gen_rtx_REG (MODE, BASE_RETURN_VALUE_REG (MODE))
 
 /* 1 if N is a possible register number for a function value
    as seen by the caller.
@@ -1615,7 +1630,7 @@ function_arg_pass_by_reference (& (CUM), (MODE), (TYPE), (NAMED))
    to pad out an argument with extra space.  The value should be of type
    `enum direction': either `upward' to pad above the argument,
    `downward' to pad below, or `none' to inhibit padding.  */
-extern enum direction function_arg_padding ();
+
 #define FUNCTION_ARG_PADDING(MODE, TYPE) \
 function_arg_padding ((MODE), (TYPE))
 
@@ -1630,17 +1645,6 @@ function_arg_padding ((MODE), (TYPE))
       || ((TYPE) && TYPE_ALIGN (TYPE) == 128)))	\
  ? 128 : PARM_BOUNDARY)
 
-/* Initialize data used by insn expanders.  This is called from
-   init_emit, once for each function, before code is generated.
-   For v9, clear the temp slot used by float/int DImode conversions.
-   ??? There is the 16 bytes at [%fp-16], however we'd like to delete this
-   space at some point.
-   ??? Use assign_stack_temp?  */
-
-extern void sparc_init_expanders ();
-extern struct rtx_def *sparc64_fpconv_stack_temp ();
-#define INIT_EXPANDERS sparc_init_expanders ()
-
 /* Define the information needed to generate branch and scc insns.  This is
    stored from the compare operation.  Note that we can't use "rtx" here
    since it hasn't been defined!  */
@@ -1691,8 +1695,8 @@ do {									\
 
 extern int leaf_function;
 #define FUNCTION_PROLOGUE(FILE, SIZE) \
-  (TARGET_FLAT ? sparc_flat_output_function_prologue (FILE, SIZE) \
-   : output_function_prologue (FILE, SIZE, leaf_function))
+  (TARGET_FLAT ? sparc_flat_output_function_prologue (FILE, (int)SIZE) \
+   : output_function_prologue (FILE, (int)SIZE, leaf_function))
 
 /* Output assembler code to FILE to increment profiler label # LABELNO
    for profiling a function entry.
@@ -2070,8 +2074,8 @@ extern int current_function_outgoing_args_size;
 extern union tree_node *current_function_decl;
 
 #define FUNCTION_EPILOGUE(FILE, SIZE) \
-  (TARGET_FLAT ? sparc_flat_output_function_epilogue (FILE, SIZE) \
-   : output_function_epilogue (FILE, SIZE, leaf_function))
+  (TARGET_FLAT ? sparc_flat_output_function_epilogue (FILE, (int)SIZE) \
+   : output_function_epilogue (FILE, (int)SIZE, leaf_function))
 
 #define DELAY_SLOTS_FOR_EPILOGUE \
   (TARGET_FLAT ? sparc_flat_epilogue_delay_slots () : 1)
@@ -2120,11 +2124,11 @@ do {									\
     }									\
   else									\
     {									\
-      ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000));	\
-      ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000));	\
-      ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000));	\
+      ASM_OUTPUT_INT (FILE, const0_rtx);				\
+      ASM_OUTPUT_INT (FILE, const0_rtx);				\
+      ASM_OUTPUT_INT (FILE, const0_rtx);				\
       ASM_OUTPUT_INT (FILE, GEN_INT (0x81C04000));	\
-      ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000));	\
+      ASM_OUTPUT_INT (FILE, const0_rtx);				\
     }									\
 } while (0)
 
@@ -2175,7 +2179,7 @@ extern struct rtx_def *sparc_builtin_saveregs ();
    that holds the dynamic chain--the previous frame's address.
    ??? -mflat support? */
 #define DYNAMIC_CHAIN_ADDRESS(frame) \
-  gen_rtx (PLUS, Pmode, frame, GEN_INT (14 * UNITS_PER_WORD))
+  gen_rtx_PLUS (Pmode, frame, GEN_INT (14 * UNITS_PER_WORD))
 
 /* The return address isn't on the stack, it is in a register, so we can't
    access it from the current frame pointer.  We can access it from the
@@ -2194,8 +2198,8 @@ extern struct rtx_def *sparc_builtin_saveregs ();
    returns, and +12 for structure returns.  */
 #define RETURN_ADDR_RTX(count, frame)		\
   ((count == -1)				\
-   ? gen_rtx (REG, Pmode, 31)			\
-   : gen_rtx (MEM, Pmode,			\
+   ? gen_rtx_REG (Pmode, 31)			\
+   : gen_rtx_MEM (Pmode,			\
 	      memory_address (Pmode, plus_constant (frame, 15 * UNITS_PER_WORD))))
 
 /* Before the prologue, the return address is %o7 + 8.  OK, sometimes it's
@@ -2203,7 +2207,7 @@ extern struct rtx_def *sparc_builtin_saveregs ();
    Actually, just using %o7 is close enough for unwinding, but %o7+8
    is something you can return to.  */
 #define INCOMING_RETURN_ADDR_RTX \
-  gen_rtx (PLUS, word_mode, gen_rtx (REG, word_mode, 15), GEN_INT (8))
+  gen_rtx_PLUS (word_mode, gen_rtx_REG (word_mode, 15), GEN_INT (8))
 
 /* The offset from the incoming value of %sp to the top of the stack frame
    for the current function.  On sparc64, we have to account for the stack
@@ -2250,6 +2254,9 @@ extern struct rtx_def *sparc_builtin_saveregs ();
 /* 1 if X is an fp register.  */
 
 #define FP_REG_P(X) (REG_P (X) && REGNO_OK_FOR_FP_P (REGNO (X)))
+
+/* Is X, a REG, an in or global register?  i.e. is regno 0..7 or 24..31 */
+#define IN_OR_GLOBAL_P(X) (REGNO (X) < 8 || (REGNO (X) >= 24 && REGNO (X) <= 31))
 
 /* Maximum number of registers that can appear in a valid memory address.  */
 
@@ -2439,30 +2446,30 @@ extern struct rtx_def *legitimize_pic_address ();
 #define LEGITIMIZE_ADDRESS(X,OLDX,MODE,WIN)	\
 { rtx sparc_x = (X);						\
   if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == MULT)	\
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 1),			\
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 1),			\
 		   force_operand (XEXP (X, 0), NULL_RTX));	\
   if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == MULT)	\
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0),			\
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0),			\
 		   force_operand (XEXP (X, 1), NULL_RTX));	\
   if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == PLUS)	\
-    (X) = gen_rtx (PLUS, Pmode, force_operand (XEXP (X, 0), NULL_RTX),\
+    (X) = gen_rtx_PLUS (Pmode, force_operand (XEXP (X, 0), NULL_RTX),\
 		   XEXP (X, 1));				\
   if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == PLUS)	\
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0),			\
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0),			\
 		   force_operand (XEXP (X, 1), NULL_RTX));	\
   if (sparc_x != (X) && memory_address_p (MODE, X))		\
     goto WIN;							\
   if (flag_pic) (X) = legitimize_pic_address (X, MODE, 0);	\
   else if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 1)))	\
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0),			\
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0),			\
 		   copy_to_mode_reg (Pmode, XEXP (X, 1)));	\
   else if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 0)))	\
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 1),			\
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 1),			\
 		   copy_to_mode_reg (Pmode, XEXP (X, 0)));	\
   else if (GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == CONST	\
 	   || GET_CODE (X) == LABEL_REF)			\
-    (X) = gen_rtx (LO_SUM, Pmode,				\
-		   copy_to_mode_reg (Pmode, gen_rtx (HIGH, Pmode, X)), X); \
+    (X) = gen_rtx_LO_SUM (Pmode,				\
+			  copy_to_mode_reg (Pmode, gen_rtx_HIGH (Pmode, X)), X); \
   if (memory_address_p (MODE, X))				\
     goto WIN; }
 
@@ -2512,7 +2519,7 @@ extern struct rtx_def *legitimize_pic_address ();
 
 /* This is how to refer to the variable errno.  */
 #define GEN_ERRNO_RTX \
-  gen_rtx (MEM, SImode, gen_rtx (SYMBOL_REF, Pmode, "errno"))
+  gen_rtx_MEM (SImode, gen_rtx_SYMBOL_REF (Pmode, "errno"))
 #endif /* 0 */
 
 /* Define if operations between registers always perform the operation
@@ -2585,7 +2592,7 @@ extern struct rtx_def *legitimize_pic_address ();
    : ((GET_CODE (X) == PLUS || GET_CODE (X) == MINUS			\
        || GET_CODE (X) == NEG || GET_CODE (X) == ASHIFT)		\
       ? (TARGET_ARCH64 && GET_MODE (X) == DImode ? CCX_NOOVmode : CC_NOOVmode) \
-      : (TARGET_ARCH64 && GET_MODE (X) == DImode ? CCXmode : CCmode)))
+      : ((TARGET_ARCH64 || TARGET_V8PLUS) && GET_MODE (X) == DImode ? CCXmode : CCmode)))
 
 /* Return non-zero if SELECT_CC_MODE will never return MODE for a
    floating point inequality comparison.  */
@@ -2645,32 +2652,32 @@ extern struct rtx_def *legitimize_pic_address ();
 #define INIT_TARGET_OPTABS						\
   do {									\
     add_optab->handlers[(int) TFmode].libfunc				\
-      = gen_rtx (SYMBOL_REF, Pmode, ADDTF3_LIBCALL);			\
+      = gen_rtx_SYMBOL_REF (Pmode, ADDTF3_LIBCALL);			\
     sub_optab->handlers[(int) TFmode].libfunc				\
-      = gen_rtx (SYMBOL_REF, Pmode, SUBTF3_LIBCALL);			\
+      = gen_rtx_SYMBOL_REF (Pmode, SUBTF3_LIBCALL);			\
     neg_optab->handlers[(int) TFmode].libfunc				\
-      = gen_rtx (SYMBOL_REF, Pmode, NEGTF2_LIBCALL);			\
+      = gen_rtx_SYMBOL_REF (Pmode, NEGTF2_LIBCALL);			\
     smul_optab->handlers[(int) TFmode].libfunc				\
-      = gen_rtx (SYMBOL_REF, Pmode, MULTF3_LIBCALL);			\
+      = gen_rtx_SYMBOL_REF (Pmode, MULTF3_LIBCALL);			\
     flodiv_optab->handlers[(int) TFmode].libfunc			\
-      = gen_rtx (SYMBOL_REF, Pmode, DIVTF3_LIBCALL);			\
-    eqtf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EQTF2_LIBCALL);		\
-    netf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, NETF2_LIBCALL);		\
-    gttf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, GTTF2_LIBCALL);		\
-    getf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, GETF2_LIBCALL);		\
-    lttf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, LTTF2_LIBCALL);		\
-    letf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, LETF2_LIBCALL);		\
-    trunctfsf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, TRUNCTFSF2_LIBCALL);   \
-    trunctfdf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, TRUNCTFDF2_LIBCALL);   \
-    extendsftf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EXTENDSFTF2_LIBCALL); \
-    extenddftf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EXTENDDFTF2_LIBCALL); \
-    floatsitf_libfunc = gen_rtx (SYMBOL_REF, Pmode, FLOATSITF2_LIBCALL);    \
-    fixtfsi_libfunc = gen_rtx (SYMBOL_REF, Pmode, FIX_TRUNCTFSI2_LIBCALL);  \
+      = gen_rtx_SYMBOL_REF (Pmode, DIVTF3_LIBCALL);			\
+    eqtf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EQTF2_LIBCALL);		\
+    netf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, NETF2_LIBCALL);		\
+    gttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GTTF2_LIBCALL);		\
+    getf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GETF2_LIBCALL);		\
+    lttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LTTF2_LIBCALL);		\
+    letf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LETF2_LIBCALL);		\
+    trunctfsf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFSF2_LIBCALL);   \
+    trunctfdf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFDF2_LIBCALL);   \
+    extendsftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDSFTF2_LIBCALL); \
+    extenddftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDDFTF2_LIBCALL); \
+    floatsitf_libfunc = gen_rtx_SYMBOL_REF (Pmode, FLOATSITF2_LIBCALL);    \
+    fixtfsi_libfunc = gen_rtx_SYMBOL_REF (Pmode, FIX_TRUNCTFSI2_LIBCALL);  \
     fixunstfsi_libfunc							\
-      = gen_rtx (SYMBOL_REF, Pmode, FIXUNS_TRUNCTFSI2_LIBCALL);		\
+      = gen_rtx_SYMBOL_REF (Pmode, FIXUNS_TRUNCTFSI2_LIBCALL);		\
     if (TARGET_FPU)							\
       sqrt_optab->handlers[(int) TFmode].libfunc			\
-	= gen_rtx (SYMBOL_REF, Pmode, "_Q_sqrt");			\
+	= gen_rtx_SYMBOL_REF (Pmode, "_Q_sqrt");			\
     INIT_SUBTARGET_OPTABS;						\
   } while (0)
 
@@ -2709,12 +2716,12 @@ extern struct rtx_def *legitimize_pic_address ();
 
 /* Compute extra cost of moving data between one register class
    and another.  */
+#define GENERAL_OR_I64(C) ((C) == GENERAL_REGS || (C) == I64_REGS)
 #define REGISTER_MOVE_COST(CLASS1, CLASS2)			\
-  (((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS)	\
-    || ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2))	\
+  (((FP_REG_CLASS_P (CLASS1) && GENERAL_OR_I64 (CLASS2)) \
+    || (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \
     || (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS)		\
-   ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6)		\
-   : 2)
+   ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) : 2)
 
 /* Provide the costs of a rtl expression.  This is in the body of a
    switch on CODE.  The purpose for the cost of MULT is to encourage
@@ -2741,20 +2748,17 @@ extern struct rtx_def *legitimize_pic_address ();
 
 /* Adjust the cost of dependencies.  */
 #define ADJUST_COST(INSN,LINK,DEP,COST)				\
-do {								\
   if (sparc_cpu == PROCESSOR_SUPERSPARC)			\
     (COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST);	\
   else if (sparc_cpu == PROCESSOR_ULTRASPARC)			\
     (COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST);	\
-} while (0)
+  else
 
 /* Conditional branches with empty delay slots have a length of two.  */
 #define ADJUST_INSN_LENGTH(INSN, LENGTH)				\
-do {									\
   if (GET_CODE (INSN) == CALL_INSN					\
       || (GET_CODE (INSN) == JUMP_INSN && ! simplejump_p (insn)))	\
-    LENGTH += 1;							\
-} while (0)
+    LENGTH += 1; else
 
 /* Control the assembler format that we output.  */
 
@@ -3252,6 +3256,16 @@ extern int v9_regcmp_p ();
 extern unsigned long sparc_flat_compute_frame_size ();
 extern unsigned long sparc_type_code ();
 
+extern char *sparc_v8plus_shift ();
+
+#ifdef __STDC__
+/* Function used for V8+ code generation.  Returns 1 if the high
+   32 bits of REG are 0 before INSN.  */   
+extern int sparc_check_64 (struct rtx_def *, struct rtx_def *);
+extern int sparc_return_peephole_ok (struct rtx_def *, struct rtx_def *);
+extern int compute_frame_size (int, int);
+#endif
+
 /* Defined in flags.h, but insn-emit.c does not include flags.h.  */
 
 extern int flag_pic;
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index ac79f68..8ef692d 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -34,7 +34,7 @@
 
 ;; Attribute for cpu type.
 ;; These must match the values for enum processor_type in sparc.h.
-(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,sparclet,tsc701,v8plus,v9,ultrasparc"
+(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,sparclet,tsc701,v9,ultrasparc"
   (const (symbol_ref "sparc_cpu_attr")))
 
 ;; Attribute for the instruction set.
@@ -67,7 +67,7 @@
 ;; type "call_no_delay_slot" is a call followed by an unimp instruction.
 
 (define_attr "type"
-  "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc"
+  "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,return,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc"
   (const_string "binary"))
 
 ;; Set true if insn uses call-clobbered intermediate register.
@@ -110,7 +110,7 @@
 ;; Attributes for instruction and branch scheduling
 
 (define_attr "in_call_delay" "false,true"
-  (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,multi")
+  (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,return,multi")
 	 	(const_string "false")
 	 (eq_attr "type" "load,fpload,store,fpstore")
 	 	(if_then_else (eq_attr "length" "1")
@@ -127,6 +127,22 @@
 (define_delay (eq_attr "type" "call")
   [(eq_attr "in_call_delay" "true") (nil) (nil)])
 
+(define_attr "leaf_function" "false,true"
+  (const (symbol_ref "leaf_function")))
+
+
+(define_attr "in_return_delay" "false,true"
+  (if_then_else (and (and (and (eq_attr "type" "move,load,sload,store,binary,ialu")
+			       (eq_attr "length" "1"))
+			  (eq_attr "leaf_function" "false"))
+		     (match_insn "eligible_for_return_delay"))
+		(const_string "true")
+		(const_string "false")))
+
+(define_delay (and (eq_attr "type" "return")
+		   (eq_attr "isa" "v9"))
+  [(eq_attr "in_return_delay" "true") (nil) (nil)])
+
 ;; ??? Should implement the notion of predelay slots for floating point
 ;; branches.  This would allow us to remove the nop always inserted before
 ;; a floating point branch.
@@ -356,7 +372,7 @@
 
 (define_function_unit "ieu" 1 0
   (and (eq_attr "cpu" "ultrasparc")
-    (eq_attr "type" "ialu,shift,compare,cmove,call"))
+    (eq_attr "type" "ialu,binary,shift,compare,cmove,call"))
   1 1)
 
 (define_function_unit "ieu_shift" 1 0
@@ -370,12 +386,15 @@
   2 1)
 
 ;; Timings; throughput/latency
-;; ?? FADD     1/3    add/sub, format conv, compar, abs, neg
-;; ?? FMUL     1/3
-;; ?? FDIVs    1/12
-;; ?? FDIVd    1/22
-;; ?? FSQRTs   1/12
-;; ?? FSQRTd   1/22
+;; FMOV     1/1    fmov, fabs, fneg
+;; FMOVcc   1/2
+;; FADD     1/4    add/sub, format conv, compar
+;; FMUL     1/4
+;; FDIVs    12/12
+;; FDIVd    22/22
+;; FSQRTs   12/12
+;; FSQRTd   22/22
+;; FCMP takes 1 cycle to branch, 2 cycles to conditional move.
 
 (define_function_unit "fadd" 1 0
   (and (eq_attr "cpu" "ultrasparc")
@@ -456,7 +475,7 @@
   [(set (reg:CCX 100)
 	(compare:CCX (match_operand:DI 0 "register_operand" "")
 		     (match_operand:DI 1 "arith_double_operand" "")))]
-  "TARGET_ARCH64"
+  "TARGET_ARCH64 || TARGET_V8PLUS"
   "
 {
   sparc_compare_op0 = operands[0];
@@ -521,6 +540,37 @@
   "cmp %0,%1"
   [(set_attr "type" "compare")])
 
+(define_insn "cmpdi_v8plus"
+  [(set (reg:CCX 100)
+	(compare:CCX (match_operand:DI 0 "register_operand" "r,r,r")
+		     (match_operand:DI 1 "arith_double_operand" "J,I,r")))
+   (clobber (match_scratch:SI 2 "=&h,&h,&h"))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "*
+{
+  /* The srl can be omitted if the value in the %L0 or %L1 is already
+     zero extended.  */
+
+  output_asm_insn (\"sllx %H0,32,%2\", operands);
+
+  if (sparc_check_64 (operands[0], insn) <= 0)
+    output_asm_insn (\"srl %L0,0,%L0\", operands);
+
+  switch (which_alternative)
+    {
+    case 0:
+      return \"orcc %L0,%2,%%g0\";
+    case 1:
+      return \"or %L0,%2,%2\;cmp %2,%1\";
+    case 2:
+      if (sparc_check_64 (operands[1], insn) <= 0)
+	output_asm_insn (\"srl %L1,0,%L1\", operands);
+      return \"sllx %H1,32,%3\;or %L0,%2,%2\;or %L1,%3,%3\;cmp %2,%3\";
+    }
+}"
+  [(set_attr "length" "3,4,7")])
+
 (define_insn "*cmpsf_fpe"
   [(set (match_operand:CCFPE 0 "fcc_reg_operand" "=c")
 	(compare:CCFPE (match_operand:SF 1 "register_operand" "f")
@@ -1008,7 +1058,7 @@
 	       (const_int 0)))]
   "TARGET_ARCH64"
   "mov 0,%0\;movrnz %1,1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
 (define_insn "*neg_snedi_zero"
@@ -1017,7 +1067,7 @@
 		       (const_int 0))))]
   "TARGET_ARCH64"
   "mov 0,%0\;movrnz %1,-1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
 (define_insn "*snedi_zero_trunc"
@@ -1026,7 +1076,7 @@
 	       (const_int 0)))]
   "TARGET_ARCH64"
   "mov 0,%0\;movrnz %1,1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
 (define_insn "*seqsi_zero"
@@ -1065,7 +1115,7 @@
 	       (const_int 0)))]
   "TARGET_ARCH64"
   "mov 0,%0\;movrz %1,1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
 (define_insn "*neg_seqdi_zero"
@@ -1074,7 +1124,7 @@
 		       (const_int 0))))]
   "TARGET_ARCH64"
   "mov 0,%0\;movrz %1,-1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
    (set_attr "length" "2")]) 
 
 (define_insn "*seqdi_zero_trunc"
@@ -1083,7 +1133,7 @@
 	       (const_int 0)))]
   "TARGET_ARCH64"
   "mov 0,%0\;movrz %1,1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
 ;; We can also do (x + (i == 0)) and related, so put them in.
@@ -1645,15 +1695,16 @@
   [(set_attr "type" "move")
    (set_attr "length" "1")])
 
-(define_insn "get_pc_via_call"
-  [(set (pc) (label_ref (match_operand 0 "" "")))
-   (set (reg:SI 15) (label_ref (match_operand 1 "" "")))]
-  ""
-  "call %l0%#"
-  [(set_attr "type" "uncond_branch")])
+(define_insn "get_pc"
+  [(clobber (reg:SI 15))
+   (set (match_operand 0 "register_operand" "=r")
+	(unspec [(match_operand 1 "" "") (match_operand 2 "" "")] 2))]
+  "flag_pic && REGNO (operands[0]) == 23"
+  "sethi %%hi(%a1-4),%0\;call %a2\;add %0,%%lo(%a1+4),%0"
+  [(set_attr "length" "3")])
 
 (define_insn "get_pc_via_rdpc"
-  [(set (match_operand:DI 0 "register_operand" "=r") (pc))]
+  [(set (match_operand 0 "register_operand" "=r") (pc))]
   "TARGET_V9"
   "rd %%pc,%0"
   [(set_attr "type" "move")])
@@ -2089,7 +2140,10 @@
   "! TARGET_LIVE_G0
    && (register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode)
-       || operands[1] == const0_rtx)"
+       || operands[1] == const0_rtx)
+   && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1])
+       || REGNO (operands[0]) < 32
+       || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)"
   "@
    mov %1,%0
    fmovs %1,%0
@@ -2099,7 +2153,7 @@
    st %r1,%0
    st %1,%0
    fzeros %0"
-  [(set_attr "type" "move,fp,move,load,fpload,store,fpstore,fpmove")
+  [(set_attr "type" "move,fpmove,move,load,fpload,store,fpstore,fpmove")
    (set_attr "length" "1")])
 
 (define_insn "*movsi_insn_liveg0"
@@ -2141,16 +2195,20 @@
     DONE;
 }")
 
-;; V8+ movdi is like regular 32 bit except that a 64 bit zero can be stored
-;; to aligned memory with a single instruction and the ldd/std instructions
-;; are not used.
-(define_insn "*movdi_v8plus"
-  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,f,f,Q,b")
-      (match_operand:DI 1 "general_operand" "r,J,r,Q,i,?f,?Q,?f,?J"))]
-  "TARGET_V8PLUS
+;; 32 bit V9 movdi is like regular 32 bit except: a 64 bit zero can be stored
+;; to aligned memory with a single instruction, the ldd/std instructions
+;; are not used, and constants can not be moved to floating point registers.
+
+(define_insn "*movdi_sp32_v9"
+  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,?e,?e,?Q,?b")
+	(match_operand:DI 1 "general_operand" "r,J,r,Q,i,e,Q,e,J"))]
+  "TARGET_V9
    && (register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode)
-       || operands[1] == const0_rtx)"
+       || operands[1] == const0_rtx)
+   && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1])
+       || REGNO (operands[0]) < 32
+       || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)"
   "*
 {
   if (which_alternative == 1)
@@ -2164,13 +2222,11 @@
   [(set_attr "type" "move,store,store,load,multi,fp,fpload,fpstore,fpmove")
    (set_attr "length" "2,1,3,3,3,2,3,3,1")])
 
-;; ??? The Haifa scheduler does not split instructions after reload if
-;; it also ran before reload.
-
+;; SPARC V9 deprecates std.  Split it here.
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "=m")
       (match_operand:DI 1 "register_operand" "r"))]
-  "TARGET_V8PLUS && !TARGET_ARCH64 && reload_completed
+  "TARGET_V9 && ! TARGET_ARCH64 && reload_completed
    && REGNO (operands[1]) < 32 && ! MEM_VOLATILE_P (operands[0])
    && offsettable_memref_p (operands[0])"
   [(set (match_dup 2) (match_dup 3))
@@ -2182,10 +2238,10 @@
    operands[2] = copy_rtx (operands[0]);
    PUT_MODE (operands[2], SImode);")
 
-(define_insn "*movdi_sp32_insn"
+(define_insn "*movdi_sp32"
   [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q")
 	(match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))]
-  "! TARGET_ARCH64
+  "! TARGET_V9
    && (register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode)
        || operands[1] == const0_rtx)"
@@ -2207,8 +2263,8 @@
 ;;; This needs the original value of operands[1], not the inverted value.
 
 (define_insn "*movdi_sp64_insn"
-  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q,?f,?f,?Q")
-	(match_operand:DI 1 "move_operand" "rI,K,Q,rJ,f,Q,f"))]
+  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q,?e,?e,?Q")
+	(match_operand:DI 1 "move_operand" "rI,K,Q,rJ,e,Q,e"))]
   "TARGET_ARCH64
    && (register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode)
@@ -2693,24 +2749,22 @@
   "
 {
   enum rtx_code code = GET_CODE (operands[1]);
-
-  if (GET_MODE (sparc_compare_op0) == DImode
-      && ! TARGET_ARCH64)
-    FAIL;
+  enum machine_mode op0_mode = GET_MODE (sparc_compare_op0);
 
   if (sparc_compare_op1 == const0_rtx
       && GET_CODE (sparc_compare_op0) == REG
-      && GET_MODE (sparc_compare_op0) == DImode
-      && v9_regcmp_p (code))
+      && ((TARGET_ARCH64 && op0_mode == DImode && v9_regcmp_p (code))
+	  || (op0_mode == SImode && v8plus_regcmp_p (code))))
     {
-      operands[1] = gen_rtx (code, DImode,
+      operands[1] = gen_rtx_fmt_ee (code, op0_mode,
 			     sparc_compare_op0, sparc_compare_op1);
     }
   else
     {
       rtx cc_reg = gen_compare_reg (code,
 				    sparc_compare_op0, sparc_compare_op1);
-      operands[1] = gen_rtx (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
+      operands[1] = gen_rtx_fmt_ee (code, GET_MODE (cc_reg),
+				    cc_reg, const0_rtx);
     }
 }")
 
@@ -2729,14 +2783,15 @@
       && GET_MODE (sparc_compare_op0) == DImode
       && v9_regcmp_p (code))
     {
-      operands[1] = gen_rtx (code, DImode,
+      operands[1] = gen_rtx_fmt_ee (code, DImode,
 			     sparc_compare_op0, sparc_compare_op1);
     }
   else
     {
       rtx cc_reg = gen_compare_reg (code,
 				    sparc_compare_op0, sparc_compare_op1);
-      operands[1] = gen_rtx (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
+      operands[1] = gen_rtx_fmt_ee (code, GET_MODE (cc_reg),
+				    cc_reg, const0_rtx);
     }
 }")
 
@@ -2963,6 +3018,57 @@
    movr%d1 %2,%r4,%0"
   [(set_attr "type" "cmove")])
 
+;; On UltraSPARC this is slightly worse than cmp/mov %icc if the register
+;; needs to be zero extended but better on average.
+(define_insn "*movsi_cc_reg_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(if_then_else:SI (match_operator 1 "v8plus_regcmp_op"
+				[(match_operand:SI 2 "register_operand" "r,r")
+				 (const_int 0)])
+		      (match_operand:SI 3 "arith10_operand" "rM,0")
+		      (match_operand:SI 4 "arith10_operand" "0,rM")))]
+  "TARGET_V9"
+  "*
+{
+  if (! sparc_check_64 (operands[2], insn))
+    output_asm_insn (\"srl %2,0,%2\", operands);
+  if (which_alternative == 0)
+    return \"movr%D1 %2,%r3,%0\";
+  return \"movr%d1 %2,%r4,%0\";
+}"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "2")])
+
+;; To work well this needs to know the current insn, but that is not an
+;; argument to gen_split_*.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(if_then_else:SI (match_operator 1 "v8plus_regcmp_op"
+				[(match_operand:SI 2 "register_operand" "r,r")
+				 (const_int 0)])
+		      (match_operand:SI 3 "arith10_operand" "rM,0")
+		      (match_operand:SI 4 "arith10_operand" "0,rM")))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(unspec:SI [(match_dup 1) (match_dup 3) (match_dup 4)] 9))]
+  "if (! sparc_check_64 (operands[2], NULL_RTX))
+     emit_insn (gen_v8plus_clear_high (operands[2], operands[2]));")
+
+;; A conditional move with the condition argument known to be zero extended
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operator 1 "v8plus_regcmp_op"
+				    [(match_operand:SI 2 "register_operand" "r,r")
+				     (const_int 0)])
+		    (match_operand:SI 3 "arith10_operand" "rM,0")
+		    (match_operand:SI 4 "arith10_operand" "0,rM")] 9))]
+  "TARGET_V9"
+  "@
+   movr%D1 %2,%r3,%0
+   movr%d1 %2,%r4,%0"
+  [(set_attr "type" "cmove")])
+
 ;; ??? The constraints of operands 3,4 need work.
 (define_insn "*movdi_cc_reg_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -3130,6 +3236,7 @@
   "lduh %1,%0"
   [(set_attr "type" "load")])
 
+
 ;; ??? Write truncdisi pattern using sra?
 
 (define_expand "zero_extendsidi2"
@@ -3148,6 +3255,20 @@
   [(set_attr "type" "unary,load")
    (set_attr "length" "1")])
 
+;; Zero extend a 32 bit value in a 64 bit register.
+(define_insn "v8plus_clear_high"
+  [(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand" "=r,Q")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r,r")] 10))]
+  "TARGET_V9"
+  "*
+if (which_alternative == 1)
+  return \"st %1,%0\";
+if (sparc_check_64 (operands[1], insn) > 0)
+  return \"nop\";
+return \"srl %1,0,%0\";
+"
+  [(set_attr "type" "shift,store")])
+
 ;; Simplify comparisons of extended values.
 
 (define_insn "*cmp_zero_extendqisi2"
@@ -3480,115 +3601,25 @@
   [(set_attr "type" "fp")])
 
 ;; Now the same for 64 bit sources.
-;; ??? We cannot put DImode values in fp regs (see below near fix_truncdfsi2).
-
-(define_expand "floatdisf2"
-  [(parallel [(set (match_operand:SF 0 "register_operand" "")
-		   (float:SF (match_operand:DI 1 "general_operand" "")))
-	      (clobber (match_dup 2))
-	      (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_expand "floatdidf2"
-  [(parallel [(set (match_operand:DF 0 "register_operand" "")
-		   (float:DF (match_operand:DI 1 "general_operand" "")))
-	      (clobber (match_dup 2))
-	      (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_expand "floatditf2"
-  [(parallel [(set (match_operand:TF 0 "register_operand" "")
-		   (float:TF (match_operand:DI 1 "general_operand" "")))
-	      (clobber (match_dup 2))
-	      (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_insn "*floatdisf2_insn"
-  [(parallel [(set (match_operand:SF 0 "register_operand" "=f")
-		   (float:SF (match_operand:DI 1 "general_operand" "rm")))
-	      (clobber (match_operand:DF 2 "register_operand" "=&e"))
-	      (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "*
-{
-  if (GET_CODE (operands[1]) == MEM)
-    output_asm_insn (\"ldd %1,%2\", operands);
-  else
-    output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands);
-  return \"fxtos %2,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-(define_insn "*floatdidf2_insn"
-  [(parallel [(set (match_operand:DF 0 "register_operand" "=e")
-		   (float:DF (match_operand:DI 1 "general_operand" "rm")))
-	      (clobber (match_operand:DF 2 "register_operand" "=&e"))
-	      (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "*
-{
-  if (GET_CODE (operands[1]) == MEM)
-    output_asm_insn (\"ldd %1,%2\", operands);
-  else
-    output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands);
-  return \"fxtod %2,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
 
-(define_insn "*floatditf2_insn"
-  [(parallel [(set (match_operand:TF 0 "register_operand" "=e")
-		   (float:TF (match_operand:DI 1 "general_operand" "rm")))
-	      (clobber (match_operand:DF 2 "register_operand" "=&e"))
-	      (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
-  "*
-{
-  if (GET_CODE (operands[1]) == MEM)
-    output_asm_insn (\"ldd %1,%2\", operands);
-  else
-    output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands);
-  return \"fxtoq %2,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-;; ??? Ideally, these are what we would like to use.
-
-(define_insn "floatdisf2_sp64"
+(define_insn "floatdisf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(float:SF (match_operand:DI 1 "register_operand" "e")))]
-  "0 && TARGET_ARCH64 && TARGET_FPU"
+  "TARGET_V9 && TARGET_FPU"
   "fxtos %1,%0"
   [(set_attr "type" "fp")])
 
-(define_insn "floatdidf2_sp64"
+(define_insn "floatdidf2"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(float:DF (match_operand:DI 1 "register_operand" "e")))]
-  "0 && TARGET_ARCH64 && TARGET_FPU"
+  "TARGET_V9 && TARGET_FPU"
   "fxtod %1,%0"
   [(set_attr "type" "fp")])
 
-(define_insn "floatditf2_sp64"
+(define_insn "floatditf2"
   [(set (match_operand:TF 0 "register_operand" "=e")
 	(float:TF (match_operand:DI 1 "register_operand" "e")))]
-  "0 && TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
   "fxtoq %1,%0"
   [(set_attr "type" "fp")])
 
@@ -3616,121 +3647,26 @@
   "fqtoi %1,%0"
   [(set_attr "type" "fp")])
 
-;; Now the same, for 64-bit targets
-;; ??? We try to work around an interesting problem.
-;; If gcc tries to do a subreg on the result it will get the wrong answer:
-;; "(subreg:SI (reg:DI M int-reg) 0)" is the same as
-;; "(subreg:SI (reg:DI N float-reg) 1)", but gcc does not know how to change
-;; the "0" to a "1".  One could enhance alter_subreg but it is not clear how to
-;; do this cleanly.
-
-(define_expand "fix_truncsfdi2"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "")
-		   (fix:DI (fix:SF (match_operand:SF 1 "register_operand" ""))))
-	      (clobber (match_dup 2))
-	      (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_expand "fix_truncdfdi2"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "")
-		   (fix:DI (fix:DF (match_operand:DF 1 "register_operand" ""))))
-	      (clobber (match_dup 2))
-	      (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
+;; Now the same, for V9 targets
 
-(define_expand "fix_trunctfdi2"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "")
-		   (fix:DI (fix:TF (match_operand:TF 1 "register_operand" ""))))
-	      (clobber (match_dup 2))
-	      (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_insn "*fix_truncsfdi2_insn"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "=rm")
-		   (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))
-	      (clobber (match_operand:DF 2 "register_operand" "=&e"))
-	      (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "*
-{
-  output_asm_insn (\"fstox %1,%2\", operands);
-  if (GET_CODE (operands[0]) == MEM)
-    return \"std %2,%0\";
-  else
-    return \"std %2,%3\;ldx %3,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-(define_insn "*fix_truncdfdi2_insn"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "=rm")
-		   (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))
-	      (clobber (match_operand:DF 2 "register_operand" "=&e"))
-	      (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "*
-{
-  output_asm_insn (\"fdtox %1,%2\", operands);
-  if (GET_CODE (operands[0]) == MEM)
-    return \"std %2,%0\";
-  else
-    return \"std %2,%3\;ldx %3,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-(define_insn "*fix_trunctfdi2_insn"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "=rm")
-		   (fix:DI (fix:TF (match_operand:TF 1 "register_operand" "e"))))
-	      (clobber (match_operand:DF 2 "register_operand" "=&e"))
-	      (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
-  "*
-{
-  output_asm_insn (\"fqtox %1,%2\", operands);
-  if (GET_CODE (operands[0]) == MEM)
-    return \"std %2,%0\";
-  else
-    return \"std %2,%3\;ldx %3,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-;; ??? Ideally, these are what we would like to use.
-
-(define_insn "fix_truncsfdi2_sp64"
+(define_insn "fix_truncsfdi2"
   [(set (match_operand:DI 0 "register_operand" "=e")
 	(fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
-  "0 && TARGET_ARCH64 && TARGET_FPU"
+  "TARGET_V9 && TARGET_FPU"
   "fstox %1,%0"
   [(set_attr "type" "fp")])
 
-(define_insn "fix_truncdfdi2_sp64"
+(define_insn "fix_truncdfdi2"
   [(set (match_operand:DI 0 "register_operand" "=e")
 	(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
-  "0 && TARGET_ARCH64 && TARGET_FPU"
+  "TARGET_V9 && TARGET_FPU"
   "fdtox %1,%0"
   [(set_attr "type" "fp")])
 
-(define_insn "fix_trunctfdi2_sp64"
+(define_insn "fix_trunctfdi2"
   [(set (match_operand:DI 0 "register_operand" "=e")
 	(fix:DI (fix:TF (match_operand:TF 1 "register_operand" "e"))))]
-  "0 && TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
   "fqtox %1,%0"
   [(set_attr "type" "fp")])
 
@@ -3785,6 +3721,77 @@
 }"
   [(set_attr "length" "2")])
 
+
+;; Split DImode arithmetic
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
+		 (match_operand:DI 2 "arith_double_operand" "rHI")))
+   (clobber (reg:SI 100))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+		   (compare:CC_NOOV (plus:SI (match_dup 4)
+					     (match_dup 5))
+				    (const_int 0)))
+	      (set (match_dup 3)
+		   (plus:SI (match_dup 4) (match_dup 5)))])
+   (set (match_dup 6)
+	(plus:SI (plus:SI (match_dup 7)
+			  (match_dup 8))
+		 (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_lowpart (SImode, operands[1]);
+   operands[5] = gen_lowpart (SImode, operands[2]);
+   operands[6] = gen_highpart (SImode, operands[0]);
+   operands[7] = gen_highpart (SImode, operands[1]);
+   if (GET_CODE (operands[2]) == CONST_INT)
+     operands[8] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
+   else
+     operands[8] = gen_highpart (SImode, operands[2]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "arith_double_operand" "r")
+		  (match_operand:DI 2 "arith_double_operand" "rHI")))
+   (clobber (reg:SI 100))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+		   (compare:CC_NOOV (minus:SI (match_dup 4)
+					      (match_dup 5))
+				    (const_int 0)))
+	      (set (match_dup 3)
+		   (minus:SI (match_dup 4) (match_dup 5)))])
+   (set (match_dup 6)
+	(minus:SI (minus:SI (match_dup 7)
+			    (match_dup 8))
+		  (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_lowpart (SImode, operands[1]);
+   operands[5] = gen_lowpart (SImode, operands[2]);
+   operands[6] = gen_highpart (SImode, operands[0]);
+   operands[7] = gen_highpart (SImode, operands[1]);
+   operands[8] = gen_highpart (SImode, operands[2]);")
+
+;; LTU here means "carry set"
+(define_insn "*addx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+			  (match_operand:SI 2 "arith_operand" "rI"))
+		 (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  ""
+  "addx %1,%2,%0"
+  [(set_attr "type" "unary")])
+
+(define_insn "*subx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+			    (match_operand:SI 2 "arith_operand" "rI"))
+		  (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  ""
+  "subx %1,%2,%0"
+  [(set_attr "type" "unary")])
+
 (define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
       (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
@@ -3976,13 +3983,50 @@
   "smul %1,%2,%0"
   [(set_attr "type" "imul")])
 
-(define_insn "muldi3"
+(define_expand "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (match_operand:DI 1 "arith_double_operand" "%r")
+		 (match_operand:DI 2 "arith_double_operand" "rHI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+  "
+{
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_muldi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*muldi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (match_operand:DI 1 "arith_double_operand" "%r")
 		 (match_operand:DI 2 "arith_double_operand" "rHI")))]
   "TARGET_ARCH64"
   "mulx %1,%2,%0")
 
+;; V8plus wide multiply.
+(define_insn "muldi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r,h")
+	(mult:DI (match_operand:DI 1 "arith_double_operand" "%r,0")
+		 (match_operand:DI 2 "arith_double_operand" "rHI,rHI")))
+   (clobber (match_scratch:SI 3 "=&h,X"))
+   (clobber (match_scratch:SI 4 "=&h,X"))]
+  "TARGET_V8PLUS"
+  "*
+{
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn (\"srl %L1,0,%L1\", operands);
+  if (which_alternative == 1)
+    output_asm_insn (\"sllx %H1,32,%H1\", operands);
+  if (sparc_check_64 (operands[2], insn) <= 0)
+    output_asm_insn (\"srl %L2,0,%L2\", operands);
+  if (which_alternative == 1)
+    return \"or %L1,%H1,%H1\;sllx %H2,32,%L1\;or %L2,%L1,%L1\;mulx %H1,%L1,%L0\;srlx %L0,32,%H0\";
+  else
+    return \"sllx %H1,32,%3\;sllx %H2,32,%4\;or %L1,%3,%3\;or %L2,%4,%4\;mulx %3,%4,%3\;srlx %3,32,%H0\;mov %3,%L0\";
+}"
+  [(set_attr "length" "9,8")])
+
 ;; It is not known whether this will match.
 
 (define_insn "*cmp_mul_set"
@@ -4010,11 +4054,35 @@
     }
 }")
 
+;; V9 puts the 64 bit product in a 64 bit register.  Only out or global
+;; registers can hold 64 bit values in the V8plus environment.
+(define_insn "*mulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul %1,%2,%L0\;srlx %L0,32,%H0
+   smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+  [(set_attr "length" "2,3")])
+
+(define_insn "*const_mulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (match_operand:SI 2 "small_int" "I,I")))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul %1,%2,%L0\;srlx %L0,32,%H0
+   smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+  [(set_attr "length" "2,3")])
+
 (define_insn "*mulsidi3_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
 		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
   "*
 {
   return TARGET_SPARCLET ? \"smuld %1,%2,%L0\" : \"smul %1,%2,%L0\;rd %%y,%H0\";
@@ -4052,15 +4120,34 @@
       emit_insn (gen_const_smulsi3_highpart (operands[0], operands[1], operands[2]));
       DONE;
     }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_smulsidi3_highpart_v8plus (operands[0], operands[1],
+						operands[2], GEN_INT (32)));
+      DONE;
+    }
 }")
 
+(define_insn "smulsidi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+		      (match_operand:SI 3 "const_int_operand" "i,i"))))
+   (clobber (match_scratch:SI 4 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul %1,%2,%0\;srlx %0,%3,%0
+   smul %1,%2,%4\;srlx %4,%3,%0"
+  [(set_attr "length" "2")])
+
 (define_insn "*smulsidi3_highpart_sp32"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
 		      (const_int 32))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
   "smul %1,%2,%%g0\;rd %%y,%0"
   [(set_attr "length" "2")])
 
@@ -4070,7 +4157,7 @@
 	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (match_operand:SI 2 "register_operand" "r"))
 		      (const_int 32))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL32"
   "smul %1,%2,%%g0\;rd %%y,%0"
   [(set_attr "length" "2")])
 
@@ -4086,13 +4173,29 @@
       emit_insn (gen_const_umulsidi3 (operands[0], operands[1], operands[2]));
       DONE;
     }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_umulsidi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
 }")
 
+(define_insn "umulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul %1,%2,%L0\;srlx %L0,32,%H0
+   umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+  [(set_attr "length" "2,3")])
+
 (define_insn "*umulsidi3_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
   "*
 {
   return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
@@ -4107,7 +4210,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 		 (match_operand:SI 2 "uns_small_int" "")))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
   "*
 {
   return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
@@ -4116,6 +4219,17 @@
 	(if_then_else (eq_attr "isa" "sparclet")
 		      (const_int 1) (const_int 2)))])
 
+(define_insn "const_umulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (match_operand:SI 2 "uns_small_int" "")))
+   (clobber (match_scratch:SI 3 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul %1,%2,%L0\;srlx %L0,32,%H0
+   umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+  [(set_attr "length" "2,3")])
+
 (define_expand "umulsi3_highpart"
   [(set (match_operand:SI 0 "register_operand" "")
 	(truncate:SI
@@ -4125,6 +4239,12 @@
   "TARGET_HARD_MUL"
   "
 {
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_umulsidi3_highpart_v8plus (operands[0], operands[1],
+						operands[2], GEN_INT (32)));
+      DONE;
+    }
   if (CONSTANT_P (operands[2]))
     {
       emit_insn (gen_const_umulsi3_highpart (operands[0], operands[1], operands[2]));
@@ -4132,13 +4252,39 @@
     }
 }")
 
+(define_insn "umulsidi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+		      (match_operand:SI 3 "const_int_operand" "i,i"))))
+   (clobber (match_scratch:SI 4 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul %1,%2,%0\;srlx %0,%3,%0
+   umul %1,%2,%4\;srlx %4,%3,%0"
+  [(set_attr "length" "2")])
+
+(define_insn "const_umulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (match_operand:SI 2 "uns_small_int" ""))
+		      (match_operand:SI 3 "const_int_operand" "i,i"))))
+   (clobber (match_scratch:SI 4 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul %1,%2,%0\;srlx %0,%3,%0
+   umul %1,%2,%4\;srlx %4,%3,%0"
+  [(set_attr "length" "2")])
+
 (define_insn "*umulsidi3_highpart_sp32"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
 		      (const_int 32))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
   "umul %1,%2,%%g0\;rd %%y,%0"
   [(set_attr "length" "2")])
 
@@ -4148,7 +4294,7 @@
 	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (match_operand:SI 2 "uns_small_int" ""))
 		      (const_int 32))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
   "umul %1,%2,%%g0\;rd %%y,%0"
   [(set_attr "length" "2")])
 
@@ -4156,21 +4302,27 @@
 ;; a y register write and a use of it for correct results.
 
 (define_insn "divsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(div:SI (match_operand:SI 1 "register_operand" "r")
-		(match_operand:SI 2 "arith_operand" "rI")))
-   (clobber (match_scratch:SI 3 "=&r"))]
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(div:SI (match_operand:SI 1 "register_operand" "r,r")
+		(match_operand:SI 2 "move_operand" "rI,m")))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
   "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
   "*
 {
+  if (which_alternative == 0)
   if (TARGET_V9)
     return \"sra %1,31,%3\;wr %%g0,%3,%%y\;sdiv %1,%2,%0\";
   else
     return \"sra %1,31,%3\;wr %%g0,%3,%%y\;nop\;nop\;nop\;sdiv %1,%2,%0\";
+  else
+    if (TARGET_V9)
+      return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;sdiv %1,%3,%0\";
+    else
+      return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;nop\;nop\;sdiv %1,%3,%0\";
 }"
   [(set (attr "length")
 	(if_then_else (eq_attr "isa" "v9")
-		      (const_int 3) (const_int 6)))])
+		      (const_int 4) (const_int 7)))])
 
 (define_insn "divdi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4202,19 +4354,28 @@
 		      (const_int 3) (const_int 6)))])
 
 (define_insn "udivsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(udiv:SI (match_operand:SI 1 "register_operand" "r")
-		 (match_operand:SI 2 "arith_operand" "rI")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,&r,&r")
+	(udiv:SI (match_operand:SI 1 "reg_or_nonsymb_mem_operand" "r,r,m")
+		 (match_operand:SI 2 "move_operand" "rI,m,r")))]
   "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
   "*
 {
+  output_asm_insn (\"wr %%g0,%%g0,%%y\", operands);
+  switch (which_alternative)
+    {
+    default:
   if (TARGET_V9)
-    return \"wr %%g0,%%g0,%%y\;udiv %1,%2,%0\";
-  else
-    return \"wr %%g0,%%g0,%%y\;nop\;nop\;nop\;udiv %1,%2,%0\";
+	return \"udiv %1,%2,%0\";
+      return \"nop\;nop\;nop\;udiv %1,%2,%0\";
+    case 1:
+      return \"ld %2,%0\;nop\;nop\;udiv %1,%0,%0\";
+    case 2:
+      return \"ld %1,%0\;nop\;nop\;udiv %0,%2,%0\";
+    }
 }"
   [(set (attr "length")
-	(if_then_else (eq_attr "isa" "v9")
+	(if_then_else (and (eq_attr "isa" "v9")
+			   (eq_attr "alternative" "0"))
 		      (const_int 2) (const_int 5)))])
 
 (define_insn "udivdi3"
@@ -4341,13 +4502,13 @@
 		(match_operand:SI 2 "" "")))
    (clobber (match_operand:SI 3 "register_operand" ""))]
   "GET_CODE (operands[2]) == CONST_INT
-   && !SMALL_INT (operands[2])
+   && !SMALL_INT32 (operands[2])
    && (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
   [(set (match_dup 3) (match_dup 4))
    (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 1)))]
   "
 {
-  operands[4] = GEN_INT (~INTVAL (operands[2]));
+  operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
 }")
 
 (define_insn "*and_not_di_sp32"
@@ -4436,13 +4597,13 @@
 		(match_operand:SI 2 "" "")))
    (clobber (match_operand:SI 3 "register_operand" ""))]
   "GET_CODE (operands[2]) == CONST_INT
-   && !SMALL_INT (operands[2])
+   && !SMALL_INT32 (operands[2])
    && (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
   [(set (match_dup 3) (match_dup 4))
    (set (match_dup 0) (ior:SI (not:SI (match_dup 3)) (match_dup 1)))]
   "
 {
-  operands[4] = GEN_INT (~INTVAL (operands[2]));
+  operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
 }")
 
 (define_insn "*or_not_di_sp32"
@@ -4479,7 +4640,7 @@
   ""
   "")
 
-(define_insn "*xorsi3_sp32"
+(define_insn "*xordi3_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r,b")
 	(xor:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
 		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
@@ -4506,7 +4667,8 @@
     }
   return \"xor %1,%2,%0\;xor %R1,%R2,%R0\";
 }"
-  [(set_attr "length" "2,1")])
+  [(set_attr "length" "2,1")
+   (set_attr "type" "ialu,fp")])
 
 (define_insn "*xordi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4531,13 +4693,13 @@
 		(match_operand:SI 2 "" "")))
    (clobber (match_operand:SI 3 "register_operand" ""))]
   "GET_CODE (operands[2]) == CONST_INT
-   && !SMALL_INT (operands[2])
+   && !SMALL_INT32 (operands[2])
    && (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
   [(set (match_dup 3) (match_dup 4))
    (set (match_dup 0) (not:SI (xor:SI (match_dup 3) (match_dup 1))))]
   "
 {
-  operands[4] = GEN_INT (~INTVAL (operands[2]));
+  operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
 }")
 
 (define_split
@@ -4546,13 +4708,13 @@
 			(match_operand:SI 2 "" ""))))
    (clobber (match_operand:SI 3 "register_operand" ""))]
   "GET_CODE (operands[2]) == CONST_INT
-   && !SMALL_INT (operands[2])
+   && !SMALL_INT32 (operands[2])
    && (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
   [(set (match_dup 3) (match_dup 4))
    (set (match_dup 0) (xor:SI (match_dup 3) (match_dup 1)))]
   "
 {
-  operands[4] = GEN_INT (~INTVAL (operands[2]));
+  operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
 }")
 
 ;; xnor patterns.  Note that (a ^ ~b) == (~a ^ b) == ~(a ^ b).
@@ -4849,7 +5011,7 @@
 {
   if (which_alternative == 0)
     return \"xnor %1,0,%0\";
-  if (which_alternative == 1)
+  if (which_alternative == 2)
     return \"fnot1s %1,%0\";
   if (TARGET_LIVE_G0)
     output_asm_insn (\"and %%g0,0,%%g0\", operands);
@@ -5138,7 +5300,23 @@
 }"
   [(set_attr "type" "shift")])
 
-(define_insn "ashldi3"
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+  "
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+	FAIL;
+      emit_insn (gen_ashldi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(ashift:DI (match_operand:DI 1 "register_operand" "r")
 		   (match_operand:SI 2 "arith_operand" "rI")))]
@@ -5152,6 +5330,15 @@
   return \"sllx %1,%2,%0\";
 }")
 
+(define_insn "ashldi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r,0,r")
+		   (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "*return sparc_v8plus_shift (operands, insn, \"sllx\");"
+  [(set_attr "length" "5,5,6")])
+
 (define_insn "*cmp_cc_ashift_1"
   [(set (reg:CC_NOOV 100)
 	(compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r")
@@ -5186,7 +5373,21 @@
 }"
   [(set_attr "type" "shift")])
 
-(define_insn "ashrdi3"
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+  "
+if (! TARGET_ARCH64)
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      FAIL;	/* prefer generic code in this case */
+    emit_insn (gen_ashrdi3_v8plus (operands[0], operands[1], operands[2]));
+    DONE;
+  }")
+
+(define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
 		     (match_operand:SI 2 "arith_operand" "rI")))]
@@ -5200,6 +5401,15 @@
   return \"srax %1,%2,%0\";
 }")
 
+(define_insn "ashrdi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+		     (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "*return sparc_v8plus_shift (operands, insn, \"srax\");"
+  [(set_attr "length" "5,5,6")])
+
 (define_insn "lshrsi3"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
@@ -5215,7 +5425,21 @@
 }"
   [(set_attr "type" "shift")])
 
-(define_insn "lshrdi3"
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+  "
+if (! TARGET_ARCH64)
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      FAIL;
+    emit_insn (gen_lshrdi3_v8plus (operands[0], operands[1], operands[2]));
+    DONE;
+  }")
+
+(define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
 		     (match_operand:SI 2 "arith_operand" "rI")))]
@@ -5228,6 +5452,15 @@
 
   return \"srlx %1,%2,%0\";
 }")
+
+(define_insn "lshrdi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+		     (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "*return sparc_v8plus_shift (operands, insn, \"srlx\");"
+  [(set_attr "length" "5,5,6")])
 
 ;; Unconditional and other jump instructions
 ;; On the Sparc, by setting the annul bit on an unconditional branch, the
@@ -5658,7 +5891,15 @@
    (use (reg:SI 31))]
   "! TARGET_EPILOGUE"
   "* return output_return (operands);"
-  [(set_attr "type" "multi")])
+  [(set_attr "type" "return")])
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SI 1 "arith_operand" "rI"))
+   (parallel [(return)
+	      (use (reg:SI 31))])]
+  "sparc_return_peephole_ok (operands[0], operands[1])"
+  "return %%i7+8\;mov %Y1,%Y0")
 
 (define_insn "nop"
   [(const_int 0)]
@@ -5684,10 +5925,10 @@
 
 ;; ??? Doesn't work with -mflat.
 (define_expand "nonlocal_goto"
-  [(match_operand:SI 0 "" "")
+  [(match_operand:SI 0 "general_operand" "")
    (match_operand:SI 1 "general_operand" "")
    (match_operand:SI 2 "general_operand" "")
-   (match_operand:SI 3 "general_operand" "")]
+   (match_operand:SI 3 "" "")]
   ""
   "
 {
@@ -5715,15 +5956,20 @@
      and reload the appropriate value into %fp.  */
   emit_move_insn (frame_pointer_rtx, stack);
 
-  /* Put in the static chain register the nonlocal label address.  */
-  emit_move_insn (static_chain_rtx, chain);
-
   /* USE of frame_pointer_rtx added for consistency; not clear if
      really needed.  */
-  emit_insn (gen_rtx (USE, VOIDmode, frame_pointer_rtx));
+  /*emit_insn (gen_rtx (USE, VOIDmode, frame_pointer_rtx));*/
   emit_insn (gen_rtx (USE, VOIDmode, stack_pointer_rtx));
-  emit_insn (gen_rtx (USE, VOIDmode, static_chain_rtx));
   /* Return, restoring reg window and jumping to goto handler.  */
+  if (TARGET_V9 && GET_CODE (chain) == CONST_INT)
+    {
+      emit_insn (gen_goto_handler_and_restore_v9 (static_chain_rtx, chain));
+      emit_barrier ();
+      DONE;
+    }
+  /* Put in the static chain register the nonlocal label address.  */
+  emit_move_insn (static_chain_rtx, chain);
+  emit_insn (gen_rtx (USE, VOIDmode, static_chain_rtx));
   emit_insn (gen_goto_handler_and_restore ());
   emit_barrier ();
   DONE;
@@ -5733,22 +5979,32 @@
 (define_insn "flush_register_windows"
   [(unspec_volatile [(const_int 0)] 1)]
   ""
-  ;; ??? Use TARGET_V9 instead?
-  "* return TARGET_ARCH64 ? \"flushw\" : \"ta 3\";"
+  "* return TARGET_V9 ? \"flushw\" : \"ta 3\";"
   [(set_attr "type" "misc")])
 
 (define_insn "goto_handler_and_restore"
-  [(unspec_volatile [(const_int 0)] 2)
-   (use (reg:SI 8))]
-  ""
+  [(unspec_volatile [(reg:SI 8)] 2)]
+  "! TARGET_V9"
   "jmp %%o0+0\;restore"
   [(set_attr "type" "misc")
    (set_attr "length" "2")])
 
-;; Implement setjmp.  Step one, set up the buffer.
+(define_insn "goto_handler_and_restore_v9"
+  [(unspec_volatile [(reg:SI 8)
+		     (match_operand:SI 0 "register_operand" "=r,r")
+		     (match_operand:SI 1 "const_int_operand" "I,n")] 3)]
+  "TARGET_V9"
+  "@
+   return %%o0+0\;mov %1,%Y0
+   sethi %%hi(%1),%0\;return %%o0+0\;or %Y0,%%lo(%1),%Y0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "2,3")])
+
+;; Pattern for use after a setjmp to store FP and the return register
+;; into the stack area.
 
-(define_expand "builtin_setjmp_setup"
-  [(unspec [(match_operand 0 "" "")] 3)]
+(define_expand "setjmp"
+  [(const_int 0)]
   ""
   "
 {
@@ -6146,54 +6402,6 @@
    && ! FP_REG_P (operands[0]) && ! FP_REG_P (operands[1])"
   "orcc %1,0,%0")
 
-;; Do {sign,zero}-extended compares somewhat more efficiently.
-;; ??? Is this now the Right Way to do this?  Or will SCRATCH
-;;     eventually have some impact here?
-
-(define_peephole
-  [(set (match_operand:HI 0 "register_operand" "")
-	(match_operand:HI 1 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-	(sign_extend:SI (match_dup 0)))
-   (set (reg:CC 100)
-	(compare:CC (match_dup 2)
-		    (const_int 0)))]
-  ""
-  "ldsh %1,%0\;orcc %0,0,%2")
-
-(define_peephole
-  [(set (match_operand:HI 0 "register_operand" "")
-	(match_operand:HI 1 "memory_operand" ""))
-   (set (match_operand:DI 2 "register_operand" "")
-	(sign_extend:DI (match_dup 0)))
-   (set (reg:CCX 100)
-	(compare:CCX (match_dup 2)
-		     (const_int 0)))]
-  "TARGET_ARCH64"
-  "ldsh %1,%0\;orcc %0,0,%2")
-
-(define_peephole
-  [(set (match_operand:QI 0 "register_operand" "")
-	(match_operand:QI 1 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-	(sign_extend:SI (match_dup 0)))
-   (set (reg:CC 100)
-	(compare:CC (match_dup 2)
-		    (const_int 0)))]
-  ""
-  "ldsb %1,%0\;orcc %0,0,%2")
-
-(define_peephole
-  [(set (match_operand:QI 0 "register_operand" "")
-	(match_operand:QI 1 "memory_operand" ""))
-   (set (match_operand:DI 2 "register_operand" "")
-	(sign_extend:DI (match_dup 0)))
-   (set (reg:CCX 100)
-	(compare:CCX (match_dup 2)
-		     (const_int 0)))]
-  "TARGET_ARCH64"
-  "ldsb %1,%0\;orcc %0,0,%2")
-
 ;; Floating-point move peepholes
 ;; ??? v9: Do we want similar ones?
 
@@ -6235,6 +6443,9 @@
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
     return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+  else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
+			 || IN_OR_GLOBAL_P (operands[1])))
+    return \"return %%i7+8\;mov %Y1,%Y0\";
   else
     return \"ret\;restore %%g0,%1,%Y0\";
 }"
@@ -6249,6 +6460,9 @@
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
     return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+  else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
+			 || IN_OR_GLOBAL_P (operands[1])))
+    return \"return %%i7+8\;mov %Y1,%Y0\";
   else
     return \"ret\;restore %%g0,%1,%Y0\";
 }"
@@ -6263,6 +6477,9 @@
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
     return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+  else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
+			 || IN_OR_GLOBAL_P (operands[1])))
+    return \"return %%i7+8\;mov %Y1,%Y0\";
   else
     return \"ret\;restore %%g0,%1,%Y0\";
 }"
@@ -6280,6 +6497,8 @@
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
     return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+  else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1]))
+    return \"return %%i7+8\;mov %Y1,%Y0\";
   else
     return \"ret\;restore %%g0,%1,%Y0\";
 }"
@@ -6287,16 +6506,19 @@
 
 (define_insn "*return_addsi"
   [(set (match_operand:SI 0 "restore_operand" "")
-	(plus:SI (match_operand:SI 1 "arith_operand" "%r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
 		 (match_operand:SI 2 "arith_operand" "rI")))
    (return)]
-  "! TARGET_EPILOGUE && ! TARGET_LIVE_G0
-   && (register_operand (operands[1], SImode)
-       || register_operand (operands[2], SImode))"
+  "! TARGET_EPILOGUE && ! TARGET_LIVE_G0"
   "*
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
     return \"jmp %%i7+12\;restore %r1,%2,%Y0\";
+  /* If operands are global or in registers, can use return */
+  else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1])
+	   && (GET_CODE (operands[2]) == CONST_INT
+	       || IN_OR_GLOBAL_P (operands[2])))
+    return \"return %%i7+8\;add %Y1,%Y2,%Y0\";
   else
     return \"ret\;restore %r1,%2,%Y0\";
 }"
-- 
2.7.4