rs6000.h (struct rs6000_args): Add sysv_gregno.
authorRichard Henderson <rth@gcc.gnu.org>
Wed, 21 Jul 1999 00:26:00 +0000 (17:26 -0700)
committerRichard Henderson <rth@gcc.gnu.org>
Wed, 21 Jul 1999 00:26:00 +0000 (17:26 -0700)
        * rs6000.h (struct rs6000_args): Add sysv_gregno.
        * rs6000.c (init_cumulative_args): Init sysv_gregno.
        (function_arg_boundary): Align DFmode.
        (function_arg_advance): Restructure for ABI_V4; use sysv_gregno
        to get fp reg and stack overflow correct.
        (function_arg): Likewise.
        (function_arg_pass_by_reference): True for TFmode for ABI_V4.
        (setup_incoming_varargs): Restructure for ABI_V4; use
        function_arg_advance to skip final named argument.
        (expand_builtin_saveregs): Properly unskip the last integer arg
        when doing varargs.  Adjust overflow location calculation.

        * ginclude/va-ppc.h (struct __va_list_tag): Make gpr and fpr
        explicitly unsigned.
        (__VA_FP_REGSAVE): Use new OFS argument instead of AP->fpr directly.
        (__VA_GP_REGSAVE): Similarly.
        (__va_longlong_p): Delete.
        (__va_arg_type_violation): New declaration.
        (va_arg): Restructure.  Flag promotion errors.  Align double.
        TFmode passed by reference.

        * rs6000.md (movdi_32+1): Use GEN_INT after arithmetic
        in the HOST_BITS_PER_WIDE_INT > 32 case.

From-SVN: r28199

gcc/config/rs6000/rs6000.c
gcc/config/rs6000/rs6000.h
gcc/config/rs6000/rs6000.md
gcc/ginclude/va-ppc.h

index f568bed..1215c13 100644 (file)
@@ -1253,6 +1253,7 @@ init_cumulative_args (cum, fntype, libname, incoming)
   cum->fregno = FP_ARG_MIN_REG;
   cum->prototype = (fntype && TYPE_ARG_TYPES (fntype));
   cum->call_cookie = CALL_NORMAL;
+  cum->sysv_gregno = GP_ARG_MIN_REG;
 
   if (incoming)
     cum->nargs_prototype = 1000;               /* don't return a PARALLEL */
@@ -1338,7 +1339,8 @@ function_arg_boundary (mode, type)
      enum machine_mode mode;
      tree type;
 {
-  if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS) && mode == DImode)
+  if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
+      && (mode == DImode || mode == DFmode))
     return 64;
 
   if (DEFAULT_ABI != ABI_NT || TARGET_64BIT)
@@ -1361,48 +1363,85 @@ function_arg_advance (cum, mode, type, named)
      tree type;
      int named;
 {
-  int align = (TARGET_32BIT && (cum->words & 1) != 0
-              && function_arg_boundary (mode, type) == 64) ? 1 : 0;
-  cum->words += align;
   cum->nargs_prototype--;
 
   if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
     {
-      /* Long longs must not be split between registers and stack */
-      if ((GET_MODE_CLASS (mode) != MODE_FLOAT || TARGET_SOFT_FLOAT)
-         && type && !AGGREGATE_TYPE_P (type)
-         && cum->words < GP_ARG_NUM_REG
-         && cum->words + RS6000_ARG_SIZE (mode, type, named) > GP_ARG_NUM_REG)
+      if (TARGET_HARD_FLOAT
+         && (mode == SFmode || mode == DFmode))
        {
-         cum->words = GP_ARG_NUM_REG;
+         if (cum->fregno <= FP_ARG_V4_MAX_REG)
+           cum->fregno++;
+         else
+           {
+             if (mode == DFmode)
+               cum->words += cum->words & 1;
+             cum->words += RS6000_ARG_SIZE (mode, type, 1);
+           }
        }
+      else
+       {
+         int n_words;
+         int gregno = cum->sysv_gregno;
+
+         /* Aggregates and IEEE quad get passed by reference.  */
+         if ((type && AGGREGATE_TYPE_P (type))
+             || mode == TFmode)
+           n_words = 1;
+         else 
+           n_words = RS6000_ARG_SIZE (mode, type, 1);
+
+         /* Long long is put in odd registers.  */
+         if (n_words == 2 && (gregno & 1) == 0)
+           gregno += 1;
+
+         /* Long long is not split between registers and stack.  */
+         if (gregno + n_words - 1 > GP_ARG_MAX_REG)
+           {
+             /* Long long is aligned on the stack.  */
+             if (n_words == 2)
+               cum->words += cum->words & 1;
+             cum->words += n_words;
+           }
 
-      /* Aggregates get passed as pointers */
-      if (type && AGGREGATE_TYPE_P (type))
-       cum->words++;
-
-      /* Floats go in registers, & don't occupy space in the GP registers
-        like they do for AIX unless software floating point.  */
-      else if (GET_MODE_CLASS (mode) == MODE_FLOAT
-              && TARGET_HARD_FLOAT
-              && cum->fregno <= FP_ARG_V4_MAX_REG)
-       cum->fregno++;
+         /* Note: continuing to accumulate gregno past when we've started
+            spilling to the stack indicates the fact that we've started
+            spilling to the stack to expand_builtin_saveregs.  */
+         cum->sysv_gregno = gregno + n_words;
+       }
 
-      else
-       cum->words += RS6000_ARG_SIZE (mode, type, 1);
+      if (TARGET_DEBUG_ARG)
+       {
+         fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
+                  cum->words, cum->fregno);
+         fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
+                  cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
+         fprintf (stderr, "mode = %4s, named = %d\n",
+                  GET_MODE_NAME (mode), named);
+       }
     }
   else
-    if (named)
-      {
-       cum->words += RS6000_ARG_SIZE (mode, type, named);
-       if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_HARD_FLOAT)
-         cum->fregno++;
-      }
+    {
+      int align = (TARGET_32BIT && (cum->words & 1) != 0
+                  && function_arg_boundary (mode, type) == 64) ? 1 : 0;
+      cum->words += align;
 
-  if (TARGET_DEBUG_ARG)
-    fprintf (stderr,
-            "function_adv: words = %2d, fregno = %2d, nargs = %4d, proto = %d, mode = %4s, named = %d, align = %d\n",
-            cum->words, cum->fregno, cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode), named, align);
+      if (named)
+       {
+         cum->words += RS6000_ARG_SIZE (mode, type, named);
+         if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_HARD_FLOAT)
+           cum->fregno++;
+       }
+
+      if (TARGET_DEBUG_ARG)
+       {
+         fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
+                  cum->words, cum->fregno);
+         fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
+                  cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
+         fprintf (stderr, "named = %d, align = %d\n", named, align);
+       }
+    }
 }
 \f
 /* Determine where to put an argument to a function.
@@ -1435,22 +1474,14 @@ function_arg (cum, mode, type, named)
      tree type;
      int named;
 {
-  int align = (TARGET_32BIT && (cum->words & 1) != 0
-              && function_arg_boundary (mode, type) == 64) ? 1 : 0;
-  int align_words = cum->words + align;
-
-  if (TARGET_DEBUG_ARG)
-    fprintf (stderr,
-            "function_arg: words = %2d, fregno = %2d, nargs = %4d, proto = %d, mode = %4s, named = %d, align = %d\n",
-            cum->words, cum->fregno, cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode), named, align);
+  enum rs6000_abi abi = DEFAULT_ABI;
 
-  /* Return a marker to indicate whether CR1 needs to set or clear the bit that V.4
-     uses to say fp args were passed in registers.  Assume that we don't need the
-     marker for software floating point, or compiler generated library calls.  */
+  /* Return a marker to indicate whether CR1 needs to set or clear the bit
+     that V.4 uses to say fp args were passed in registers.  Assume that we
+     don't need the marker for software floating point, or compiler generated
+     library calls.  */
   if (mode == VOIDmode)
     {
-      enum rs6000_abi abi = DEFAULT_ABI;
-
       if ((abi == ABI_V4 || abi == ABI_SOLARIS)
          && TARGET_HARD_FLOAT
          && cum->nargs_prototype < 0
@@ -1465,31 +1496,65 @@ function_arg (cum, mode, type, named)
       return GEN_INT (cum->call_cookie);
     }
 
-  if (!named)
+  if (abi == ABI_V4 || abi == ABI_SOLARIS)
     {
-      if (DEFAULT_ABI != ABI_V4 && DEFAULT_ABI != ABI_SOLARIS)
-       return NULL_RTX;
+      if (TARGET_HARD_FLOAT
+         && (mode == SFmode || mode == DFmode))
+       {
+         if (cum->fregno <= FP_ARG_V4_MAX_REG)
+           return gen_rtx_REG (mode, cum->fregno);
+         else
+           return NULL;
+       }
+      else
+       {
+         int n_words;
+         int gregno = cum->sysv_gregno;
+
+         /* Aggregates and IEEE quad get passed by reference.  */
+         if ((type && AGGREGATE_TYPE_P (type))
+             || mode == TFmode)
+           n_words = 1;
+         else 
+           n_words = RS6000_ARG_SIZE (mode, type, 1);
+
+         /* Long long is put in odd registers.  */
+         if (n_words == 2 && (gregno & 1) == 0)
+           gregno += 1;
+
+         /* Long long is not split between registers and stack.  */
+         if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
+           return gen_rtx_REG (mode, gregno);
+         else
+           return NULL;
+       }
     }
+  else
+    {
+      int align = (TARGET_32BIT && (cum->words & 1) != 0
+                  && function_arg_boundary (mode, type) == 64) ? 1 : 0;
+      int align_words = cum->words + align;
 
-  if (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
-    return NULL_RTX;
+      if (!named)
+       return NULL_RTX;
 
-  if (USE_FP_FOR_ARG_P (*cum, mode, type))
-    {
-      if (DEFAULT_ABI == ABI_V4 /* V.4 never passes FP values in GP registers */
-         || DEFAULT_ABI == ABI_SOLARIS
-         || ! type
-         || ((cum->nargs_prototype > 0)
-             /* IBM AIX extended its linkage convention definition always to
-                require FP args after register save area hole on the stack.  */
-             && (DEFAULT_ABI != ABI_AIX
-                 || ! TARGET_XL_CALL
-                 || (align_words < GP_ARG_NUM_REG))))
-       return gen_rtx_REG (mode, cum->fregno);
-
-      return gen_rtx_PARALLEL (mode,
-                     gen_rtvec
-                     (2,
+      if (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
+        return NULL_RTX;
+
+      if (USE_FP_FOR_ARG_P (*cum, mode, type))
+       {
+         if (! type
+             || ((cum->nargs_prototype > 0)
+                 /* IBM AIX extended its linkage convention definition always
+                    to require FP args after register save area hole on the
+                    stack.  */
+                 && (DEFAULT_ABI != ABI_AIX
+                     || ! TARGET_XL_CALL
+                     || (align_words < GP_ARG_NUM_REG))))
+           return gen_rtx_REG (mode, cum->fregno);
+
+          return gen_rtx_PARALLEL (mode,
+           gen_rtvec (2,
                       gen_rtx_EXPR_LIST (VOIDmode,
                                ((align_words >= GP_ARG_NUM_REG)
                                 ? NULL_RTX
@@ -1507,21 +1572,12 @@ function_arg (cum, mode, type, named)
                       gen_rtx_EXPR_LIST (VOIDmode,
                                gen_rtx_REG (mode, cum->fregno),
                                const0_rtx)));
+       }
+      else if (align_words < GP_ARG_NUM_REG)
+       return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+      else
+       return NULL_RTX;
     }
-
-  /* Long longs won't be split between register and stack;
-     FP arguments get passed on the stack if they didn't get a register.  */
-  else if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS) &&
-          (align_words + RS6000_ARG_SIZE (mode, type, named) > GP_ARG_NUM_REG
-           || (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_HARD_FLOAT)))
-    {
-      return NULL_RTX;
-    }
-
-  else if (align_words < GP_ARG_NUM_REG)
-    return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
-
-  return NULL_RTX;
 }
 \f
 /* For an arg passed partly in registers and partly in memory,
@@ -1576,7 +1632,8 @@ function_arg_pass_by_reference (cum, mode, type, named)
      int named ATTRIBUTE_UNUSED;
 {
   if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
-      && type && AGGREGATE_TYPE_P (type))
+      && ((type && AGGREGATE_TYPE_P (type))
+         || mode == TFmode))
     {
       if (TARGET_DEBUG_ARG)
        fprintf (stderr, "function_arg_pass_by_reference: aggregate\n");
@@ -1611,73 +1668,87 @@ setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
      int no_rtl;
 
 {
-  rtx save_area = virtual_incoming_args_rtx;
-  int reg_size = TARGET_32BIT ? 4 : 8;
-
-  if (TARGET_DEBUG_ARG)
-    fprintf (stderr,
-            "setup_vararg: words = %2d, fregno = %2d, nargs = %4d, proto = %d, mode = %4s, no_rtl= %d\n",
-            cum->words, cum->fregno, cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode), no_rtl);
+  CUMULATIVE_ARGS next_cum;
+  int reg_size = TARGET_32BIT ? 4 : 8;
+  rtx save_area;
+  int first_reg_offset;
 
   if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
     {
+      tree fntype;
+      int stdarg_p;
+
+      fntype = TREE_TYPE (current_function_decl);
+      stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
+                 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
+                     != void_type_node));
+
+      /* For varargs, we do not want to skip the dummy va_dcl argument.
+         For stdargs, we do want to skip the last named argument.  */
+      next_cum = *cum;
+      if (stdarg_p)
+       function_arg_advance (&next_cum, mode, type, 1);
+
+      /* Indicate to allocate space on the stack for varargs save area.  */
+      /* ??? Does this really have to be located at a magic spot on the
+        stack, or can we allocate this with assign_stack_local instead.  */
       rs6000_sysv_varargs_p = 1;
       if (! no_rtl)
        save_area = plus_constant (virtual_stack_vars_rtx,
                                   - RS6000_VARARGS_SIZE);
+
+      first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
     }
   else
-    rs6000_sysv_varargs_p = 0;
-
-  if (cum->words < 8)
     {
-      int first_reg_offset = cum->words;
+      save_area = virtual_incoming_args_rtx;
+      rs6000_sysv_varargs_p = 0;
 
+      first_reg_offset = cum->words;
       if (MUST_PASS_IN_STACK (mode, type))
        first_reg_offset += RS6000_ARG_SIZE (TYPE_MODE (type), type, 1);
+    }
 
-      if (first_reg_offset > GP_ARG_NUM_REG)
-       first_reg_offset = GP_ARG_NUM_REG;
-
-      if (!no_rtl && first_reg_offset != GP_ARG_NUM_REG)
-       move_block_from_reg
-         (GP_ARG_MIN_REG + first_reg_offset,
-          gen_rtx_MEM (BLKmode,
-                   plus_constant (save_area, first_reg_offset * reg_size)),
-          GP_ARG_NUM_REG - first_reg_offset,
-          (GP_ARG_NUM_REG - first_reg_offset) * UNITS_PER_WORD);
-
+  if (!no_rtl && first_reg_offset < GP_ARG_NUM_REG)
+    {
+      move_block_from_reg
+       (GP_ARG_MIN_REG + first_reg_offset,
+        gen_rtx_MEM (BLKmode,
+                     plus_constant (save_area, first_reg_offset * reg_size)),
+        GP_ARG_NUM_REG - first_reg_offset,
+        (GP_ARG_NUM_REG - first_reg_offset) * UNITS_PER_WORD);
+
+      /* ??? Does ABI_V4 need this at all?  */
       *pretend_size = (GP_ARG_NUM_REG - first_reg_offset) * UNITS_PER_WORD;
     }
 
   /* Save FP registers if needed.  */
-  if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS) && TARGET_HARD_FLOAT && !no_rtl)
+  if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
+      && TARGET_HARD_FLOAT && !no_rtl
+      && next_cum.fregno <= FP_ARG_V4_MAX_REG)
     {
-      int fregno     = cum->fregno;
-      int num_fp_reg = FP_ARG_V4_MAX_REG + 1 - fregno;
+      int fregno = next_cum.fregno;
+      rtx cr1 = gen_rtx_REG (CCmode, 69);
+      rtx lab = gen_label_rtx ();
+      int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG) * 8);
 
-      if (num_fp_reg >= 0)
-       {
-         rtx cr1 = gen_rtx_REG (CCmode, 69);
-         rtx lab = gen_label_rtx ();
-         int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG) * 8);
-
-         emit_jump_insn (gen_rtx_SET (VOIDmode,
+      emit_jump_insn (gen_rtx_SET (VOIDmode,
                                   pc_rtx,
                                   gen_rtx_IF_THEN_ELSE (VOIDmode,
-                                           gen_rtx_NE (VOIDmode, cr1, const0_rtx),
+                                           gen_rtx_NE (VOIDmode, cr1,
+                                                       const0_rtx),
                                            gen_rtx_LABEL_REF (VOIDmode, lab),
                                            pc_rtx)));
 
-         while ( num_fp_reg-- >= 0)
-           {
-             emit_move_insn (gen_rtx_MEM (DFmode, plus_constant (save_area, off)),
-                             gen_rtx_REG (DFmode, fregno++));
-             off += 8;
-           }
-
-         emit_label (lab);
+      while (fregno <= FP_ARG_V4_MAX_REG)
+       {
+         emit_move_insn (gen_rtx_MEM (DFmode, plus_constant (save_area, off)),
+                         gen_rtx_REG (DFmode, fregno));
+         fregno++;
+         off += 8;
        }
+
+      emit_label (lab);
     }
 }
 \f
@@ -1733,9 +1804,18 @@ expand_builtin_saveregs (args)
                                                     2 * UNITS_PER_WORD));
 
   /* Construct the two characters of `gpr' and `fpr' as a unit.  */
-  words = current_function_args_info.words - !stdarg_p;
-  gpr = (words > 8 ? 8 : words);
-  fpr = current_function_args_info.fregno - 33;
+  words = current_function_args_info.words;
+  gpr = current_function_args_info.sysv_gregno - GP_ARG_MIN_REG;
+  fpr = current_function_args_info.fregno - FP_ARG_MIN_REG;
+
+  /* Varargs has the va_dcl argument, but we don't count it.  */
+  if (!stdarg_p)
+    {
+      if (gpr > GP_ARG_NUM_REG)
+        words -= 1;
+      else
+        gpr -= 1;
+    }
 
   if (BYTES_BIG_ENDIAN)
     {
@@ -1754,12 +1834,9 @@ expand_builtin_saveregs (args)
   emit_move_insn (mem_gpr_fpr, tmp);
 
   /* Find the overflow area.  */
-  if (words <= 8)
-    tmp = virtual_incoming_args_rtx;
-  else
-    tmp = expand_binop (Pmode, add_optab, virtual_incoming_args_rtx,
-                       GEN_INT ((words - 8) * UNITS_PER_WORD),
-                       mem_overflow, 0, OPTAB_WIDEN);
+  tmp = expand_binop (Pmode, add_optab, virtual_incoming_args_rtx,
+                     GEN_INT (words * UNITS_PER_WORD),
+                     mem_overflow, 0, OPTAB_WIDEN);
   if (tmp != mem_overflow)
     emit_move_insn (mem_overflow, tmp);
 
@@ -1773,7 +1850,6 @@ expand_builtin_saveregs (args)
   /* Return the address of the va_list constructor.  */
   return XEXP (block, 0);
 }
-
 \f
 /* Generate a memory reference for expand_block_move, copying volatile,
    and other bits from an original memory reference.  */
index 4e22131..4e5c9ac 100644 (file)
@@ -1435,17 +1435,22 @@ extern int rs6000_sysv_varargs_p;
    floating-point register number, and the third says how many more args we
    have prototype types for.
 
+   For ABI_V4, we treat these slightly differently -- `sysv_gregno' is
+   the next availible GP register, `fregno' is the next available FP
+   register, and `words' is the number of words used on the stack.
+
    The varargs/stdarg support requires that this structure's size
-   be a multiple of sizeof(int). */
+   be a multiple of sizeof(int).  */
 
 typedef struct rs6000_args
 {
-  int words;                   /* # words uses for passing GP registers */
+  int words;                   /* # words used for passing GP registers */
   int fregno;                  /* next available FP register */
   int nargs_prototype;         /* # args left in the current prototype */
   int orig_nargs;              /* Original value of nargs_prototype */
   int prototype;               /* Whether a prototype was defined */
   int call_cookie;             /* Do special things for this call */
+  int sysv_gregno;             /* next available GP register */
 } CUMULATIVE_ARGS;
 
 /* Define intermediate macro to compute the size (in registers) of an argument
index d9e82a8..bb23a1a 100644 (file)
 #if HOST_BITS_PER_WIDE_INT == 32
   operands[4] = (INTVAL (operands[1]) & 0x80000000) ? constm1_rtx : const0_rtx;
 #else
-  operands[4] = (HOST_WIDE_INT) INTVAL (operands[1]) >> 32;
-  operands[1] = INTVAL (operands[1]) & 0xffffffff;
+  operands[4] = GEN_INT ((HOST_WIDE_INT) INTVAL (operands[1]) >> 32);
+  operands[1] = GEN_INT (INTVAL (operands[1]) & 0xffffffff);
 #endif
 }")
 
index 6d81497..0c79d5f 100644 (file)
 /* Note that the names in this structure are in the user's namespace, but
    that the V.4 abi explicitly states that these names should be used.  */
 typedef struct __va_list_tag {
-  char gpr;                    /* index into the array of 8 GPRs stored in the
+  unsigned char gpr;           /* index into the array of 8 GPRs stored in the
                                   register save area gpr=0 corresponds to r3,
                                   gpr=1 to r4, etc. */
-  char fpr;                    /* index into the array of 8 FPRs stored in the
+  unsigned char fpr;           /* index into the array of 8 FPRs stored in the
                                   register save area fpr=0 corresponds to f1,
                                   fpr=1 to f2, etc. */
   char *overflow_arg_area;     /* location on stack that holds the next
@@ -51,13 +51,13 @@ typedef struct {
 /* Macros to access the register save area */
 /* We cast to void * and then to TYPE * because this avoids
    a warning about increasing the alignment requirement.  */
-#define __VA_FP_REGSAVE(AP,TYPE)                                       \
+#define __VA_FP_REGSAVE(AP,OFS,TYPE)                                   \
   ((TYPE *) (void *) (&(((__va_regsave_t *)                            \
-                        (AP)->reg_save_area)->__fp_save[(int)(AP)->fpr])))
+                        (AP)->reg_save_area)->__fp_save[OFS])))
 
-#define __VA_GP_REGSAVE(AP,TYPE)                                       \
+#define __VA_GP_REGSAVE(AP,OFS,TYPE)                                   \
   ((TYPE *) (void *) (&(((__va_regsave_t *)                            \
-                        (AP)->reg_save_area)->__gp_save[(int)(AP)->gpr])))
+                        (AP)->reg_save_area)->__gp_save[OFS])))
 
 /* Common code for va_start for both varargs and stdarg.  We allow all
    the work to be done by __builtin_saveregs.  It returns a pointer to
@@ -88,60 +88,103 @@ typedef struct {
 #define __va_float_p(TYPE)     (__builtin_classify_type(*(TYPE *)0) == 8)
 #endif
 
-#define __va_longlong_p(TYPE) \
-  ((__builtin_classify_type(*(TYPE *)0) == 1) && (sizeof(TYPE) == 8))
-
 #define __va_aggregate_p(TYPE) (__builtin_classify_type(*(TYPE *)0) >= 12)
 #define __va_size(TYPE)                ((sizeof(TYPE) + sizeof (long) - 1) / sizeof (long))
 
-#define va_arg(AP,TYPE)                                                        \
-__extension__ (*({                                                     \
-  register TYPE *__ptr;                                                        \
-                                                                       \
-  if (__va_float_p (TYPE) && (AP)->fpr < 8)                            \
-    {                                                                  \
-      __ptr = __VA_FP_REGSAVE (AP, TYPE);                              \
-      (AP)->fpr++;                                                     \
-    }                                                                  \
-                                                                       \
-  else if (__va_aggregate_p (TYPE) && (AP)->gpr < 8)                   \
-    {                                                                  \
-      __ptr = * __VA_GP_REGSAVE (AP, TYPE *);                          \
-      (AP)->gpr++;                                                     \
-    }                                                                  \
-                                                                       \
-  else if (!__va_float_p (TYPE) && !__va_aggregate_p (TYPE)            \
-          && (AP)->gpr + __va_size(TYPE) <= 8                          \
-          && (!__va_longlong_p(TYPE)                                   \
-              || (AP)->gpr + __va_size(TYPE) <= 8))                    \
-    {                                                                  \
-      if (__va_longlong_p(TYPE) && ((AP)->gpr & 1) != 0)               \
-       (AP)->gpr++;                                                    \
-                                                                       \
-      __ptr = __VA_GP_REGSAVE (AP, TYPE);                              \
-      (AP)->gpr += __va_size (TYPE);                                   \
-    }                                                                  \
-                                                                       \
-  else if (!__va_float_p (TYPE) && !__va_aggregate_p (TYPE)            \
-          && (AP)->gpr < 8)                                            \
-    {                                                                  \
-      (AP)->gpr = 8;                                                   \
-      __ptr = (TYPE *) (void *) (__va_overflow(AP));                   \
-      __va_overflow(AP) += __va_size (TYPE) * sizeof (long);           \
-    }                                                                  \
-                                                                       \
-  else if (__va_aggregate_p (TYPE))                                    \
-    {                                                                  \
-      __ptr = * (TYPE **) (void *) (__va_overflow(AP));                        \
-      __va_overflow(AP) += sizeof (TYPE *);                            \
-    }                                                                  \
-  else                                                                 \
-    {                                                                  \
-      __ptr = (TYPE *) (void *) (__va_overflow(AP));                   \
-      __va_overflow(AP) += __va_size (TYPE) * sizeof (long);           \
-    }                                                                  \
-                                                                       \
-  __ptr;                                                               \
+/* This symbol isn't defined.  It is used to flag type promotion violations
+   at link time.  We can only do this when optimizing.  Use __builtin_trap
+   instead of abort so that we don't require a prototype for abort.  */
+
+#ifdef __OPTIMIZE__
+extern void __va_arg_type_violation(void) __attribute__((__noreturn__));
+#else
+#define __va_arg_type_violation() __builtin_trap()
+#endif
+
+#define va_arg(AP,TYPE)                                                           \
+__extension__ (*({                                                        \
+  register TYPE *__ptr;                                                           \
+                                                                          \
+  if (__va_float_p (TYPE) && sizeof (TYPE) < 16)                          \
+    {                                                                     \
+      unsigned char __fpr = (AP)->fpr;                                    \
+      if (__fpr < 8)                                                      \
+       {                                                                  \
+         __ptr = __VA_FP_REGSAVE (AP, __fpr, TYPE);                       \
+         (AP)->fpr = __fpr + 1;                                           \
+       }                                                                  \
+      else if (sizeof (TYPE) == 8)                                        \
+       {                                                                  \
+         unsigned long __addr = (unsigned long) (__va_overflow (AP));     \
+         __ptr = (TYPE *)((__addr + 7) & -8);                             \
+         __va_overflow (AP) = (char *)(__ptr + 1);                        \
+       }                                                                  \
+      else                                                                \
+       {                                                                  \
+         /* float is promoted to double.  */                              \
+         __va_arg_type_violation ();                                      \
+       }                                                                  \
+    }                                                                     \
+                                                                          \
+  /* Aggregates and long doubles are passed by reference.  */             \
+  else if (__va_aggregate_p (TYPE) || __va_float_p (TYPE))                \
+    {                                                                     \
+      unsigned char __gpr = (AP)->gpr;                                    \
+      if (__gpr < 8)                                                      \
+       {                                                                  \
+         __ptr = * __VA_GP_REGSAVE (AP, __gpr, TYPE *);                   \
+         (AP)->gpr = __gpr + 1;                                           \
+       }                                                                  \
+      else                                                                \
+       {                                                                  \
+         TYPE **__pptr = (TYPE **) (__va_overflow (AP));                  \
+         __ptr = * __pptr;                                                \
+         __va_overflow (AP) = (char *) (__pptr + 1);                      \
+       }                                                                  \
+    }                                                                     \
+                                                                          \
+  /* Only integrals remaining.  */                                        \
+  else                                                                    \
+    {                                                                     \
+      /* longlong is aligned.  */                                         \
+      if (sizeof (TYPE) == 8)                                             \
+       {                                                                  \
+         unsigned char __gpr = (AP)->gpr;                                 \
+         if (__gpr < 7)                                                   \
+           {                                                              \
+             __gpr += __gpr & 1;                                          \
+             __ptr = __VA_GP_REGSAVE (AP, __gpr, TYPE);                   \
+             (AP)->gpr = __gpr + 2;                                       \
+           }                                                              \
+         else                                                             \
+           {                                                              \
+             unsigned long __addr = (unsigned long) (__va_overflow (AP)); \
+             __ptr = (TYPE *)((__addr + 7) & -8);                         \
+             (AP)->gpr = 8;                                               \
+             __va_overflow (AP) = (char *)(__ptr + 1);                    \
+           }                                                              \
+       }                                                                  \
+      else if (sizeof (TYPE) == 4)                                        \
+       {                                                                  \
+         unsigned char __gpr = (AP)->gpr;                                 \
+         if (__gpr < 8)                                                   \
+           {                                                              \
+             __ptr = __VA_GP_REGSAVE (AP, __gpr, TYPE);                   \
+             (AP)->gpr = __gpr + 1;                                       \
+           }                                                              \
+         else                                                             \
+           {                                                              \
+             __ptr = (TYPE *) __va_overflow (AP);                         \
+             __va_overflow (AP) = (char *)(__ptr + 1);                    \
+           }                                                              \
+       }                                                                  \
+      else                                                                \
+       {                                                                  \
+         /* Everything else was promoted to int.  */                      \
+         __va_arg_type_violation ();                                      \
+       }                                                                  \
+    }                                                                     \
+  __ptr;                                                                  \
 }))
 
 #define va_end(AP)     ((void)0)