aarch64: Move return value handling into ffi_call_SYSV

author Richard Henderson <rth@redhat.com>

Wed, 22 Oct 2014 21:06:19 +0000 (17:06 -0400)

committer Richard Henderson <rth@twiddle.net>

Wed, 12 Nov 2014 08:28:44 +0000 (09:28 +0100)
author Richard Henderson <rth@redhat.com>
Wed, 22 Oct 2014 21:06:19 +0000 (17:06 -0400)
committer Richard Henderson <rth@twiddle.net>
Wed, 12 Nov 2014 08:28:44 +0000 (09:28 +0100)
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c

index a067303..ffa1363 100644 (file)
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -523,30 +523,90 @@ allocate_int_to_reg_or_stack (struct call_context *context,
  ffi_status
  ffi_prep_cif_machdep (ffi_cif *cif)
  {
-  /* Round the stack up to a multiple of the stack alignment requirement. */
-  cif->bytes = ALIGN(cif->bytes, 16);
-
-  /* Initialize our flags. We are interested if this CIF will touch a
-     vector register, if so we will enable context save and load to
-     those registers, otherwise not. This is intended to be friendly
-     to lazy float context switching in the kernel.  */
-  cif->aarch64_flags = 0;
+  ffi_type *rtype = cif->rtype;
+  size_t bytes = cif->bytes;
+  int flags, aarch64_flags, i, n;
  
-  if (is_v_register_candidate (cif->rtype))
+  switch (rtype->type)
      {
-      cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
-    }
-  else
-    {
-      int i;
-      for (i = 0; i < cif->nargs; i++)
-        if (is_v_register_candidate (cif->arg_types[i]))
-          {
-            cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
-            break;
-          }
+    case FFI_TYPE_VOID:
+      flags = AARCH64_RET_VOID;
+      break;
+    case FFI_TYPE_UINT8:
+      flags = AARCH64_RET_UINT8;
+      break;
+    case FFI_TYPE_UINT16:
+      flags = AARCH64_RET_UINT16;
+      break;
+    case FFI_TYPE_UINT32:
+      flags = AARCH64_RET_UINT32;
+      break;
+    case FFI_TYPE_SINT8:
+      flags = AARCH64_RET_SINT8;
+      break;
+    case FFI_TYPE_SINT16:
+      flags = AARCH64_RET_SINT16;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+      flags = AARCH64_RET_SINT32;
+      break;
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+      flags = AARCH64_RET_INT64;
+      break;
+    case FFI_TYPE_POINTER:
+      flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
+      break;
+
+    case FFI_TYPE_FLOAT:
+      flags = AARCH64_RET_S1;
+      break;
+    case FFI_TYPE_DOUBLE:
+      flags = AARCH64_RET_D1;
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      flags = AARCH64_RET_Q1;
+      break;
+
+    case FFI_TYPE_STRUCT:
+      {
+       int h = is_hfa (rtype);
+       size_t s = rtype->size;
+
+       if (h)
+         flags = (h & 0xff) * 4 + 4 - (h >> 8);
+       else if (s > 16)
+         {
+           flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
+           bytes += 8;
+         }
+       else if (s == 16)
+         flags = AARCH64_RET_INT128;
+       else if (s == 8)
+         flags = AARCH64_RET_INT64;
+       else
+         flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
+      }
+      break;
+
+    default:
+      abort();
      }
  
+  aarch64_flags = 0;
+  for (i = 0, n = cif->nargs; i < n; i++)
+    if (is_v_register_candidate (cif->arg_types[i]))
+      {
+       aarch64_flags = AARCH64_FLAG_ARG_V;
+       flags |= AARCH64_FLAG_ARG_V;
+       break;
+      }
+
+  /* Round the stack up to a multiple of the stack alignment requirement. */
+  cif->bytes = ALIGN(bytes, 16);
+  cif->flags = flags;
+  cif->aarch64_flags = aarch64_flags;
  #if defined (__APPLE__)
    cif->aarch64_nfixedargs = 0;
  #endif
@@ -555,51 +615,65 @@ ffi_prep_cif_machdep (ffi_cif *cif)
  }
  
  #if defined (__APPLE__)
-
  /* Perform Apple-specific cif processing for variadic calls */
  ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
                                     unsigned int nfixedargs,
                                     unsigned int ntotalargs)
  {
-  ffi_status status;
-
-  status = ffi_prep_cif_machdep (cif);
-
+  ffi_status status = ffi_prep_cif_machdep (cif);
    cif->aarch64_nfixedargs = nfixedargs;
-
    return status;
  }
+#endif /* __APPLE__ */
  
-#endif
-
-extern void ffi_call_SYSV (void *stack, void *frame,
-                          void (*fn)(void), int flags) FFI_HIDDEN;
+extern void ffi_call_SYSV (struct call_context *context, void *frame,
+                          void (*fn)(void), void *rvalue, int flags)
+       FFI_HIDDEN;
  
  /* Call a function with the provided arguments and capture the return
     value.  */
  void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+ffi_call (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, void **avalue)
  {
    struct call_context *context;
-  void *stack, *frame;
+  void *stack, *frame, *rvalue;
    struct arg_state state;
-  size_t stack_bytes;
-  int i, nargs = cif->nargs;
-  int h, t;
+  size_t stack_bytes, rtype_size, rsize;
+  int i, nargs, flags;
    ffi_type *rtype;
  
-  /* Allocate consectutive stack for everything we'll need.  */
+  flags = cif->flags;
+  rtype = cif->rtype;
+  rtype_size = rtype->size;
    stack_bytes = cif->bytes;
-  stack = alloca (stack_bytes + 32 + sizeof(struct call_context));
+
+  /* If the target function returns a structure via hidden pointer,
+     then we cannot allow a null rvalue.  Otherwise, mash a null
+     rvalue to void return type.  */
+  rsize = 0;
+  if (flags & AARCH64_RET_IN_MEM)
+    {
+      if (orig_rvalue == NULL)
+       rsize = rtype_size;
+    }
+  else if (orig_rvalue == NULL)
+    flags &= AARCH64_FLAG_ARG_V;
+  else if (flags & AARCH64_RET_NEED_COPY)
+    rsize = 16;
+
+  /* Allocate consectutive stack for everything we'll need.  */
+  context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
+  stack = context + 1;
    frame = stack + stack_bytes;
-  context = frame + 32;
+  rvalue = (rsize ? frame + 32 : orig_rvalue);
  
    arg_init (&state);
-  for (i = 0; i < nargs; i++)
+  for (i = 0, nargs = cif->nargs; i < nargs; i++)
      {
        ffi_type *ty = cif->arg_types[i];
        size_t s = ty->size;
        void *a = avalue[i];
+      int h, t;
  
        t = ty->type;
        switch (t)
@@ -717,54 +791,10 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
  #endif
      }
  
-  rtype = cif->rtype;
-  if (is_register_candidate (rtype))
-    {
-      ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
+  ffi_call_SYSV (context, frame, fn, rvalue, flags);
  
-      t = rtype->type;
-      switch (t)
-       {
-       case FFI_TYPE_INT:
-       case FFI_TYPE_UINT8:
-       case FFI_TYPE_SINT8:
-       case FFI_TYPE_UINT16:
-       case FFI_TYPE_SINT16:
-       case FFI_TYPE_UINT32:
-       case FFI_TYPE_SINT32:
-       case FFI_TYPE_POINTER:
-       case FFI_TYPE_UINT64:
-       case FFI_TYPE_SINT64:
-         *(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t);
-         break;
-
-       case FFI_TYPE_FLOAT:
-       case FFI_TYPE_DOUBLE:
-       case FFI_TYPE_LONGDOUBLE:
-         compress_hfa_type (rvalue, &context->v[0], 0x100 + t);
-         break;
-
-       case FFI_TYPE_STRUCT:
-         h = is_hfa (cif->rtype);
-         if (h)
-           compress_hfa_type (rvalue, &context->v[0], h);
-         else
-           {
-             FFI_ASSERT (rtype->size <= 16);
-             memcpy (rvalue, &context->x[0], rtype->size);
-           }
-         break;
-
-       default:
-         FFI_ASSERT (0);
-         break;
-       }
-    }
-  else
-    {
-      context->x8 = (uintptr_t)rvalue;
-      ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
-    }
+  if (flags & AARCH64_RET_NEED_COPY)
+    memcpy (orig_rvalue, rvalue, rtype_size);
  }
  
  static unsigned char trampoline [] =
diff --git a/src/aarch64/internal.h b/src/aarch64/internal.h

index b6b6104..a3070db 100644 (file)
--- a/src/aarch64/internal.h
+++ b/src/aarch64/internal.h
@@ -18,7 +18,48 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
  
-#define AARCH64_FLAG_ARG_V_BIT 0
+#define AARCH64_RET_VOID       0
+#define AARCH64_RET_INT64      1
+#define AARCH64_RET_INT128     2
+
+#define AARCH64_RET_UNUSED3    3
+#define AARCH64_RET_UNUSED4    4
+#define AARCH64_RET_UNUSED5    5
+#define AARCH64_RET_UNUSED6    6
+#define AARCH64_RET_UNUSED7    7
+
+/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4,
+   so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT).  */
+#define AARCH64_RET_S4         8
+#define AARCH64_RET_S3         9
+#define AARCH64_RET_S2         10
+#define AARCH64_RET_S1         11
+
+#define AARCH64_RET_D4         12
+#define AARCH64_RET_D3         13
+#define AARCH64_RET_D2         14
+#define AARCH64_RET_D1         15
+
+#define AARCH64_RET_Q4         16
+#define AARCH64_RET_Q3         17
+#define AARCH64_RET_Q2         18
+#define AARCH64_RET_Q1         19
+
+/* Note that each of the sub-64-bit integers gets two entries.  */
+#define AARCH64_RET_UINT8      20
+#define AARCH64_RET_UINT16     22
+#define AARCH64_RET_UINT32     24
+
+#define AARCH64_RET_SINT8      26
+#define AARCH64_RET_SINT16     28
+#define AARCH64_RET_SINT32     30
+
+#define AARCH64_RET_MASK       31
+
+#define AARCH64_RET_IN_MEM     (1 << 5)
+#define AARCH64_RET_NEED_COPY  (1 << 6)
+
+#define AARCH64_FLAG_ARG_V_BIT 7
  #define AARCH64_FLAG_ARG_V     (1 << AARCH64_FLAG_ARG_V_BIT)
  
  #define N_X_ARG_REG            8
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S

index a5f636a..ba15663 100644 (file)
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -40,9 +40,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
  #endif
  
         .text
-       .align 2
+       .align 4
  
-       .globl CNAME(ffi_call_SYSV)
+       .globl  CNAME(ffi_call_SYSV)
  #ifdef __ELF__
         .type   CNAME(ffi_call_SYSV), #function
         .hidden CNAME(ffi_call_SYSV)
@@ -50,14 +50,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
  
  /* ffi_call_SYSV
     extern void ffi_call_SYSV (void *stack, void *frame,
-                             void (*fn)(void), int flags);
+                             void (*fn)(void), void *rvalue, int flags);
  
     Therefore on entry we have:
  
     x0 stack
     x1 frame
     x2 fn
-   x3 flags
+   x3 rvalue
+   x4 flags
  */
  
         cfi_startproc
@@ -71,43 +72,111 @@ CNAME(ffi_call_SYSV):
         cfi_rel_offset (x29, 0)
         cfi_rel_offset (x30, 8)
  
-       str     w3, [x29, #16]          /* save flags */
         mov     x9, x2                  /* save fn */
+       mov     x8, x3                  /* install structure return */
+       stp     x3, x4, [x29, #16]      /* save rvalue and flags */
  
         /* Load the vector argument passing registers, if necessary.  */
-       tbz     w3, #AARCH64_FLAG_ARG_V_BIT, 1f
-       ldp     q0, q1, [x29, #32 + 0]
-       ldp     q2, q3, [x29, #32 + 32]
-       ldp     q4, q5, [x29, #32 + 64]
-       ldp     q6, q7, [x29, #32 + 96]
+       tbz     w4, #AARCH64_FLAG_ARG_V_BIT, 1f
+       ldp     q0, q1, [sp, #0]
+       ldp     q2, q3, [sp, #32]
+       ldp     q4, q5, [sp, #64]
+       ldp     q6, q7, [sp, #96]
  1:
         /* Load the core argument passing registers, including
            the structure return pointer.  */
-       ldp     x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0]
-       ldp     x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16]
-       ldp     x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32]
-       ldp     x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48]
-       ldr     x8,     [x29, #32 + 16*N_V_ARG_REG + 64]
+       ldp     x0, x1, [sp, #16*N_V_ARG_REG + 0]
+       ldp     x2, x3, [sp, #16*N_V_ARG_REG + 16]
+       ldp     x4, x5, [sp, #16*N_V_ARG_REG + 32]
+       ldp     x6, x7, [sp, #16*N_V_ARG_REG + 48]
+
+       /* Deallocate the context, leaving the stacked arguments.  */
+       add     sp, sp, #CALL_CONTEXT_SIZE
  
         blr     x9                      /* call fn */
  
-       ldr     w3, [x29, #16]          /* reload flags */
+       ldp     x3, x4, [x29, #16]      /* reload rvalue and flags */
  
         /* Partially deconstruct the stack frame.  */
         mov     sp, x29
         cfi_def_cfa_register (sp)
         ldp     x29, x30, [x29]
  
-       /* Save the core return registers.  */
-       stp     x0, x1, [sp, #32 + 16*N_V_ARG_REG]
-
-       /* Save the vector return registers, if necessary.  */
-       tbz     w3, #AARCH64_FLAG_ARG_V_BIT, 1f
-       stp     q0, q1, [sp, #32 + 0]
-       stp     q2, q3, [sp, #32 + 32]
-1:
-       /* All done.  */
+       /* Save the return value as directed.  */
+       adr     x5, 0f
+       and     w4, w4, #AARCH64_RET_MASK
+       add     x5, x5, x4, lsl #3
+       br      x5
+
+       /* Note that each table entry is 2 insns, and thus 8 bytes.
+          For integer data, note that we're storing into ffi_arg
+          and therefore we want to extend to 64 bits; these types
+          have two consecutive entries allocated for them.  */
+       .align  4
+0:     ret                             /* VOID */
+       nop
+1:     str     x0, [x3]                /* INT64 */
+       ret
+2:     stp     x0, x1, [x3]            /* INT128 */
+       ret
+3:     brk     #1000                   /* UNUSED */
+       ret
+4:     brk     #1000                   /* UNUSED */
+       ret
+5:     brk     #1000                   /* UNUSED */
+       ret
+6:     brk     #1000                   /* UNUSED */
+       ret
+7:     brk     #1000                   /* UNUSED */
+       ret
+8:     st4     { v0.s-v3.s }[0], [x3]  /* S4 */
+       ret
+9:     st3     { v0.s-v2.s }[0], [x3]  /* S3 */
         ret
+10:    stp     s0, s1, [x3]            /* S2 */
+       ret
+11:    str     s0, [x3]                /* S1 */
+       ret
+12:    st4     { v0.d-v3.d }[0], [x3]  /* D4 */
+       ret
+13:    st3     { v0.d-v2.d }[0], [x3]  /* D3 */
+       ret
+14:    stp     d0, d1, [x3]            /* D2 */
+       ret
+15:    str     d0, [x3]                /* D1 */
+       ret
+16:    str     q3, [x3, #48]           /* Q4 */
+       nop
+17:    str     q2, [x3, #32]           /* Q3 */
+       nop
+18:    stp     q0, q1, [x3]            /* Q2 */
+       ret
+19:    str     q0, [x3]                /* Q1 */
+       ret
+20:    uxtb    w0, w0                  /* UINT8 */
+       str     x0, [x3]
+21:    ret                             /* reserved */
+       nop
+22:    uxth    w0, w0                  /* UINT16 */
+       str     x0, [x3]
+23:    ret                             /* reserved */
+       nop
+24:    mov     w0, w0                  /* UINT32 */
+       str     x0, [x3]
+25:    ret                             /* reserved */
+       nop
+26:    sxtb    x0, w0                  /* SINT8 */
+       str     x0, [x3]
+27:    ret                             /* reserved */
+       nop
+28:    sxth    x0, w0                  /* SINT16 */
+       str     x0, [x3]
+29:    ret                             /* reserved */
+       nop
+30:    sxtw    x0, w0                  /* SINT32 */
+       str     x0, [x3]
+31:    ret                             /* reserved */
+       nop
  
         cfi_endproc
  #ifdef __ELF__
@@ -154,9 +223,13 @@ CNAME(ffi_call_SYSV):
     Voila!  */
  
          .text
-        .align 2
+        .align 4
  
-        .globl CNAME(ffi_closure_SYSV)
+        .globl CNAME(ffi_closure_SYSV)
+#ifdef __ELF__
+       .type   CNAME(ffi_closure_SYSV), #function
+       .hidden CNAME(ffi_closure_SYSV)
+#endif
          cfi_startproc
  CNAME(ffi_closure_SYSV):
          stp     x29, x30, [sp, #-16]!
author	Richard Henderson <rth@redhat.com>
	Wed, 22 Oct 2014 21:06:19 +0000 (17:06 -0400)
committer	Richard Henderson <rth@twiddle.net>
	Wed, 12 Nov 2014 08:28:44 +0000 (09:28 +0100)
src/aarch64/ffi.c		patch \| blob \| history
src/aarch64/internal.h		patch \| blob \| history
src/aarch64/sysv.S		patch \| blob \| history