From bf310028ad28c695526c5a18889554091c119121 Mon Sep 17 00:00:00 2001
From: Alan Modra <amodra@bigpond.net.au>
Date: Sun, 8 Feb 2004 12:11:25 +0000
Subject: [PATCH] ffi.c (ffi_prep_cif_machdep <FFI_LINUX64>): Correct long
 double function return and long double arg handling.

	* src/powerpc/ffi.c (ffi_prep_cif_machdep <FFI_LINUX64>): Correct
	long double function return and long double arg handling.
	(ffi_closure_helper_LINUX64): Formatting.  Delete unused "ng" var.
	Use "end_pfr" instead of "nf".  Correct long double handling.
	Localise "temp".
	* src/powerpc/linux64.S (ffi_call_LINUX64): Save f2 long double
	return value.
	* src/powerpc/linux64_closure.S (ffi_closure_LINUX64): Allocate
	space for long double return value.  Adjust stack frame and offsets.
	Load f2 long double return.

From-SVN: r77481
---
 libffi/ChangeLog                     |  13 +++++
 libffi/src/powerpc/ffi.c             | 110 ++++++++++++++++++++---------------
 libffi/src/powerpc/linux64.S         |   1 +
 libffi/src/powerpc/linux64_closure.S |  68 +++++++++++-----------
 4 files changed, 110 insertions(+), 82 deletions(-)

diff --git a/libffi/ChangeLog b/libffi/ChangeLog
index d19883a..4d931c9 100644
--- a/libffi/ChangeLog
+++ b/libffi/ChangeLog
@@ -1,3 +1,16 @@
+2004-02-08  Alan Modra  <amodra@bigpond.net.au>
+
+	* src/powerpc/ffi.c (ffi_prep_cif_machdep <FFI_LINUX64>): Correct
+	long double function return and long double arg handling.
+	(ffi_closure_helper_LINUX64): Formatting.  Delete unused "ng" var.
+	Use "end_pfr" instead of "nf".  Correct long double handling.
+	Localise "temp".
+	* src/powerpc/linux64.S (ffi_call_LINUX64): Save f2 long double
+	return value.
+	* src/powerpc/linux64_closure.S (ffi_closure_LINUX64): Allocate
+	space for long double return value.  Adjust stack frame and offsets.
+	Load f2 long double return.
+
 2004-02-07  Alan Modra  <amodra@bigpond.net.au>
 
 	* src/types.c: Use 16 byte long double for POWERPC64.
diff --git a/libffi/src/powerpc/ffi.c b/libffi/src/powerpc/ffi.c
index 9ad85ea..a2fb2d2 100644
--- a/libffi/src/powerpc/ffi.c
+++ b/libffi/src/powerpc/ffi.c
@@ -446,6 +446,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
   int fparg_count = 0, intarg_count = 0;
   unsigned flags = 0;
   unsigned struct_copy_size = 0;
+  unsigned type = cif->rtype->type;
 
   if (cif->abi != FFI_LINUX64)
     {    
@@ -468,6 +469,11 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
 
       /* Space for the mandatory parm save area and general registers.  */
       bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof(long);
+
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+      if (type == FFI_TYPE_LONGDOUBLE)
+	type = FFI_TYPE_DOUBLE;
+#endif
     }
 
   /* Return value handling.  The rules for SYSV are as follows:
@@ -480,9 +486,9 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
        are allocated space and a pointer is passed as the first argument.
      For LINUX64:
      - integer values in gpr3;
-     - Structures/Unions and long double by reference;
-     - Single/double FP values in fpr1.  */
-  switch (cif->rtype->type)
+     - Structures/Unions by reference;
+     - Single/double FP values in fpr1, long double in fpr1,fpr2.  */
+  switch (type)
     {
     case FFI_TYPE_DOUBLE:
       flags |= FLAG_RETURNS_64BITS;
@@ -581,6 +587,12 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
       {
 	switch ((*ptr)->type)
 	  {
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	  case FFI_TYPE_LONGDOUBLE:
+	    fparg_count += 2;
+	    intarg_count += 2;
+	    break;
+#endif
 	  case FFI_TYPE_FLOAT:
 	  case FFI_TYPE_DOUBLE:
 	    fparg_count++;
@@ -588,9 +600,6 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
 	    break;
 
 	  case FFI_TYPE_STRUCT:
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-	  case FFI_TYPE_LONGDOUBLE:
-#endif
 	    intarg_count += ((*ptr)->size + 7) / 8;
 	    break;
 
@@ -863,7 +872,7 @@ ffi_closure_helper_SYSV (ffi_closure* closure, void * rvalue,
              pst++;
           }
 	  break;
-	
+
 	case FFI_TYPE_STRUCT:
 	  /* Structs are passed by reference. The address will appear in a 
 	     gpr if it is one of the first 8 arguments.  */
@@ -965,34 +974,28 @@ int hidden ffi_closure_helper_LINUX64 (ffi_closure*, void*, unsigned long*,
 				       ffi_dblfl*);
 
 int hidden
-ffi_closure_helper_LINUX64 (ffi_closure* closure, void * rvalue, 
-            unsigned long * pst, ffi_dblfl * pfr)
+ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue, 
+			    unsigned long *pst, ffi_dblfl *pfr)
 {
   /* rvalue is the pointer to space for return value in closure assembly */
   /* pst is the pointer to parameter save area
      (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
   /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
 
-  void **          avalue;
-  ffi_type **      arg_types;
-  long             i, avn;
-  long             nf;   /* number of floating registers already used */
-  long             ng;   /* number of general registers already used */
-  ffi_cif *        cif; 
-  double           temp; 
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  ffi_cif *cif; 
+  ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
 
   cif = closure->cif;
-  avalue = alloca(cif->nargs * sizeof(void *));
-
-  nf = 0;
-  ng = 0;
+  avalue = alloca (cif->nargs * sizeof (void *));
 
   /* Copy the caller's structure return value address so that the closure
      returns the data directly to the caller.  */
   if (cif->rtype->type == FFI_TYPE_STRUCT)
     {
       rvalue = (void *) *pst;
-      ng++;
       pst++;
     }
 
@@ -1008,21 +1011,18 @@ ffi_closure_helper_LINUX64 (ffi_closure* closure, void * rvalue,
 	case FFI_TYPE_SINT8:
 	case FFI_TYPE_UINT8:
 	  avalue[i] = (char *) pst + 7;
-	  ng++;
 	  pst++;
 	  break;
            
 	case FFI_TYPE_SINT16:
 	case FFI_TYPE_UINT16:
 	  avalue[i] = (char *) pst + 6;
-	  ng++;
 	  pst++;
 	  break;
 
 	case FFI_TYPE_SINT32:
 	case FFI_TYPE_UINT32:
 	  avalue[i] = (char *) pst + 4;
-	  ng++;
 	  pst++;
 	  break;
 
@@ -1030,21 +1030,16 @@ ffi_closure_helper_LINUX64 (ffi_closure* closure, void * rvalue,
 	case FFI_TYPE_UINT64:
 	case FFI_TYPE_POINTER:
 	  avalue[i] = pst;
-	  ng++;
 	  pst++;
 	  break;
 
 	case FFI_TYPE_STRUCT:
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-	case FFI_TYPE_LONGDOUBLE:
-#endif
 	  /* Structures with size less than eight bytes are passed
 	     left-padded.  */
 	  if (arg_types[i]->size < 8)
 	    avalue[i] = (char *) pst + 8 - arg_types[i]->size;
 	  else
 	    avalue[i] = pst;
-	  ng += (arg_types[i]->size + 7) / 8;
 	  pst += (arg_types[i]->size + 7) / 8;
 	  break;
 
@@ -1056,16 +1051,15 @@ ffi_closure_helper_LINUX64 (ffi_closure* closure, void * rvalue,
 
           /* there are 13 64bit floating point registers */
 
-          if (nf < NUM_FPR_ARG_REGISTERS64) {
-             temp = pfr->d;
-             pfr->f = (float)temp;
-             avalue[i] = pfr;
-             pfr++;
-          } else {
-	     avalue[i] = pst;
-          }
-          nf++;
-	  ng++;
+          if (pfr < end_pfr)
+	    {
+	      double temp = pfr->d;
+	      pfr->f = (float) temp;
+	      avalue[i] = pfr;
+	      pfr++;
+	    }
+	  else
+	    avalue[i] = pst;
 	  pst++;
 	  break;
 
@@ -1073,17 +1067,38 @@ ffi_closure_helper_LINUX64 (ffi_closure* closure, void * rvalue,
 	  /* On the outgoing stack all values are aligned to 8 */
           /* there are 13 64bit floating point registers */
 
-          if (nf < NUM_FPR_ARG_REGISTERS64) {
-	     avalue[i] = pfr;
-             pfr++;
-          } else {
-	     avalue[i] = pst;
-          }
-          nf++;
-	  ng++;
+	  if (pfr < end_pfr)
+	    {
+	      avalue[i] = pfr;
+	      pfr++;
+	    }
+	  else
+	    avalue[i] = pst;
 	  pst++;
 	  break;
 
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+	  if (prf + 1 < end_pfr)
+	    {
+	      avalue[i] = pfr;
+	      pfr += 2;
+	    }
+	  else
+	    {
+	      if (pfr < end_pfr)
+		{
+		  /* Passed partly in f13 and partly on the stack.
+		     Move it all to the stack.  */
+		  *pst = *(unsigned long *) pfr;
+		  pfr++;
+		}
+	      avalue[i] = pst;
+	    }
+	  pst += 2;
+	  break;
+#endif
+
 	default:
 	  FFI_ASSERT(0);
 	}
@@ -1096,5 +1111,4 @@ ffi_closure_helper_LINUX64 (ffi_closure* closure, void * rvalue,
 
   /* Tell ffi_closure_LINUX64 how to perform return type promotions.  */
   return cif->rtype->type;
-
 }
diff --git a/libffi/src/powerpc/linux64.S b/libffi/src/powerpc/linux64.S
index f12d4ec..11cf926 100644
--- a/libffi/src/powerpc/linux64.S
+++ b/libffi/src/powerpc/linux64.S
@@ -122,6 +122,7 @@ ffi_call_LINUX64:
 .Lfp_return_value:
 	bf	28, .Lfloat_return_value
 	stfd	%f1, 0(%r30)
+	stfd	%f2, 8(%r30)	/* It might be a long double */
 	b	.Ldone_return_value
 .Lfloat_return_value:
 	stfs	%f1, 0(%r30)
diff --git a/libffi/src/powerpc/linux64_closure.S b/libffi/src/powerpc/linux64_closure.S
index db78ea6..fa331db 100644
--- a/libffi/src/powerpc/linux64_closure.S
+++ b/libffi/src/powerpc/linux64_closure.S
@@ -30,24 +30,24 @@ ffi_closure_LINUX64:
 	std	%r0, 16(%r1)
 
 	# mandatory 48 bytes special reg save area + 64 bytes parm save area
-	# + 8 bytes retval area + 13*8 bytes fpr save area
-	stdu	%r1, -224(%r1)
+	# + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
+	stdu	%r1, -240(%r1)
 .LCFI0:
 
 	# next save fpr 1 to fpr 13
-	stfd  %f1, 120+(0*8)(%r1)
-	stfd  %f2, 120+(1*8)(%r1)
-	stfd  %f3, 120+(2*8)(%r1)
-	stfd  %f4, 120+(3*8)(%r1)
-	stfd  %f5, 120+(4*8)(%r1)
-	stfd  %f6, 120+(5*8)(%r1)
-	stfd  %f7, 120+(6*8)(%r1)
-	stfd  %f8, 120+(7*8)(%r1)
-	stfd  %f9, 120+(8*8)(%r1)
-	stfd  %f10, 120+(9*8)(%r1)
-	stfd  %f11, 120+(10*8)(%r1)
-	stfd  %f12, 120+(11*8)(%r1)
-	stfd  %f13, 120+(12*8)(%r1)
+	stfd  %f1, 128+(0*8)(%r1)
+	stfd  %f2, 128+(1*8)(%r1)
+	stfd  %f3, 128+(2*8)(%r1)
+	stfd  %f4, 128+(3*8)(%r1)
+	stfd  %f5, 128+(4*8)(%r1)
+	stfd  %f6, 128+(5*8)(%r1)
+	stfd  %f7, 128+(6*8)(%r1)
+	stfd  %f8, 128+(7*8)(%r1)
+	stfd  %f9, 128+(8*8)(%r1)
+	stfd  %f10, 128+(9*8)(%r1)
+	stfd  %f11, 128+(10*8)(%r1)
+	stfd  %f12, 128+(11*8)(%r1)
+	stfd  %f13, 128+(12*8)(%r1)
 
 	# set up registers for the routine that actually does the work
 	# get the context pointer from the trampoline
@@ -58,10 +58,10 @@ ffi_closure_LINUX64:
 
 	# now load up the pointer to the parameter save area
 	# in the previous frame
-	addi %r5, %r1, 224 + 48
+	addi %r5, %r1, 240 + 48
 
 	# now load up the pointer to the saved fpr registers */
-	addi %r6, %r1, 120
+	addi %r6, %r1, 128
 
 	# make the call
 	bl .ffi_closure_helper_LINUX64
@@ -76,7 +76,7 @@ ffi_closure_LINUX64:
 	mflr %r4		# move address of .Lret to r4
 	sldi %r3, %r3, 4	# now multiply return type by 16
 	addi %r4, %r4, .Lret_type0 - .Lret
-	ld %r0, 224+16(%r1)
+	ld %r0, 240+16(%r1)
 	add %r3, %r3, %r4	# add contents of table to table address
 	mtctr %r3
 	bctr			# jump to it
@@ -89,33 +89,33 @@ ffi_closure_LINUX64:
 .Lret_type0:
 # case FFI_TYPE_VOID
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 	nop
 # case FFI_TYPE_INT
 	lwa %r3, 112+4(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_FLOAT
 	lfs %f1, 112+0(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_DOUBLE
 	lfd %f1, 112+0(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_LONGDOUBLE
 	lfd %f1, 112+0(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
-	blr
+	lfd %f2, 112+8(%r1)
+	b .Lfinish
 # case FFI_TYPE_UINT8
 	lbz %r3, 112+7(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_SINT8
 	lbz %r3, 112+7(%r1)
@@ -126,42 +126,42 @@ ffi_closure_LINUX64:
 	lhz %r3, 112+6(%r1)
 	mtlr %r0
 .Lfinish:
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_SINT16
 	lha %r3, 112+6(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_UINT32
 	lwz %r3, 112+4(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_SINT32
 	lwa %r3, 112+4(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_UINT64
 	ld %r3, 112+0(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_SINT64
 	ld %r3, 112+0(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # case FFI_TYPE_STRUCT
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 	nop
 # case FFI_TYPE_POINTER
 	ld %r3, 112+0(%r1)
 	mtlr %r0
-	addi %r1, %r1, 224
+	addi %r1, %r1, 240
 	blr
 # esac
 .LFE1:
@@ -196,7 +196,7 @@ ffi_closure_LINUX64:
 	.byte	0x2	 # DW_CFA_advance_loc1
 	.byte	.LCFI0-.LFB1
 	.byte	0xe	 # DW_CFA_def_cfa_offset
-	.uleb128 224
+	.uleb128 240
 	.byte	0x11	 # DW_CFA_offset_extended_sf
 	.uleb128 0x41
 	.sleb128 -2
-- 
2.7.4