ffi_status
ffi_prep_cif_machdep (ffi_cif *cif)
{
- /* Round the stack up to a multiple of the stack alignment requirement. */
- cif->bytes = ALIGN(cif->bytes, 16);
-
- /* Initialize our flags. We are interested if this CIF will touch a
- vector register, if so we will enable context save and load to
- those registers, otherwise not. This is intended to be friendly
- to lazy float context switching in the kernel. */
- cif->aarch64_flags = 0;
+ ffi_type *rtype = cif->rtype;
+ size_t bytes = cif->bytes;
+ int flags, aarch64_flags, i, n;
- if (is_v_register_candidate (cif->rtype))
+ switch (rtype->type)
{
- cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
- }
- else
- {
- int i;
- for (i = 0; i < cif->nargs; i++)
- if (is_v_register_candidate (cif->arg_types[i]))
- {
- cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
- break;
- }
+ case FFI_TYPE_VOID:
+ flags = AARCH64_RET_VOID;
+ break;
+ case FFI_TYPE_UINT8:
+ flags = AARCH64_RET_UINT8;
+ break;
+ case FFI_TYPE_UINT16:
+ flags = AARCH64_RET_UINT16;
+ break;
+ case FFI_TYPE_UINT32:
+ flags = AARCH64_RET_UINT32;
+ break;
+ case FFI_TYPE_SINT8:
+ flags = AARCH64_RET_SINT8;
+ break;
+ case FFI_TYPE_SINT16:
+ flags = AARCH64_RET_SINT16;
+ break;
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ flags = AARCH64_RET_SINT32;
+ break;
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ flags = AARCH64_RET_INT64;
+ break;
+ case FFI_TYPE_POINTER:
+ flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
+ break;
+
+ case FFI_TYPE_FLOAT:
+ flags = AARCH64_RET_S1;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = AARCH64_RET_D1;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = AARCH64_RET_Q1;
+ break;
+
+ case FFI_TYPE_STRUCT:
+ {
+ int h = is_hfa (rtype);
+ size_t s = rtype->size;
+
+ if (h)
+ flags = (h & 0xff) * 4 + 4 - (h >> 8);
+ else if (s > 16)
+ {
+ flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
+ bytes += 8;
+ }
+ else if (s == 16)
+ flags = AARCH64_RET_INT128;
+ else if (s == 8)
+ flags = AARCH64_RET_INT64;
+ else
+ flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
+ }
+ break;
+
+ default:
+ abort();
}
+ aarch64_flags = 0;
+ for (i = 0, n = cif->nargs; i < n; i++)
+ if (is_v_register_candidate (cif->arg_types[i]))
+ {
+ aarch64_flags = AARCH64_FLAG_ARG_V;
+ flags |= AARCH64_FLAG_ARG_V;
+ break;
+ }
+
+ /* Round the stack up to a multiple of the stack alignment requirement. */
+ cif->bytes = ALIGN(bytes, 16);
+ cif->flags = flags;
+ cif->aarch64_flags = aarch64_flags;
#if defined (__APPLE__)
cif->aarch64_nfixedargs = 0;
#endif
}
#if defined (__APPLE__)
-
/* Perform Apple-specific cif processing for variadic calls */
ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
unsigned int nfixedargs,
unsigned int ntotalargs)
{
- ffi_status status;
-
- status = ffi_prep_cif_machdep (cif);
-
+ ffi_status status = ffi_prep_cif_machdep (cif);
cif->aarch64_nfixedargs = nfixedargs;
-
return status;
}
+#endif /* __APPLE__ */
-#endif
-
-extern void ffi_call_SYSV (void *stack, void *frame,
- void (*fn)(void), int flags) FFI_HIDDEN;
+extern void ffi_call_SYSV (struct call_context *context, void *frame,
+ void (*fn)(void), void *rvalue, int flags)
+ FFI_HIDDEN;
/* Call a function with the provided arguments and capture the return
value. */
void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+ffi_call (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, void **avalue)
{
struct call_context *context;
- void *stack, *frame;
+ void *stack, *frame, *rvalue;
struct arg_state state;
- size_t stack_bytes;
- int i, nargs = cif->nargs;
- int h, t;
+ size_t stack_bytes, rtype_size, rsize;
+ int i, nargs, flags;
ffi_type *rtype;
- /* Allocate consectutive stack for everything we'll need. */
+ flags = cif->flags;
+ rtype = cif->rtype;
+ rtype_size = rtype->size;
stack_bytes = cif->bytes;
- stack = alloca (stack_bytes + 32 + sizeof(struct call_context));
+
+ /* If the target function returns a structure via hidden pointer,
+ then we cannot allow a null rvalue. Otherwise, mash a null
+ rvalue to void return type. */
+ rsize = 0;
+ if (flags & AARCH64_RET_IN_MEM)
+ {
+ if (orig_rvalue == NULL)
+ rsize = rtype_size;
+ }
+ else if (orig_rvalue == NULL)
+ flags &= AARCH64_FLAG_ARG_V;
+ else if (flags & AARCH64_RET_NEED_COPY)
+ rsize = 16;
+
+ /* Allocate consectutive stack for everything we'll need. */
+ context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
+ stack = context + 1;
frame = stack + stack_bytes;
- context = frame + 32;
+ rvalue = (rsize ? frame + 32 : orig_rvalue);
arg_init (&state);
- for (i = 0; i < nargs; i++)
+ for (i = 0, nargs = cif->nargs; i < nargs; i++)
{
ffi_type *ty = cif->arg_types[i];
size_t s = ty->size;
void *a = avalue[i];
+ int h, t;
t = ty->type;
switch (t)
#endif
}
- rtype = cif->rtype;
- if (is_register_candidate (rtype))
- {
- ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
+ ffi_call_SYSV (context, frame, fn, rvalue, flags);
- t = rtype->type;
- switch (t)
- {
- case FFI_TYPE_INT:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_POINTER:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_SINT64:
- *(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t);
- break;
-
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- compress_hfa_type (rvalue, &context->v[0], 0x100 + t);
- break;
-
- case FFI_TYPE_STRUCT:
- h = is_hfa (cif->rtype);
- if (h)
- compress_hfa_type (rvalue, &context->v[0], h);
- else
- {
- FFI_ASSERT (rtype->size <= 16);
- memcpy (rvalue, &context->x[0], rtype->size);
- }
- break;
-
- default:
- FFI_ASSERT (0);
- break;
- }
- }
- else
- {
- context->x8 = (uintptr_t)rvalue;
- ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
- }
+ if (flags & AARCH64_RET_NEED_COPY)
+ memcpy (orig_rvalue, rvalue, rtype_size);
}
static unsigned char trampoline [] =
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#define AARCH64_FLAG_ARG_V_BIT 0
+#define AARCH64_RET_VOID 0
+#define AARCH64_RET_INT64 1
+#define AARCH64_RET_INT128 2
+
+#define AARCH64_RET_UNUSED3 3
+#define AARCH64_RET_UNUSED4 4
+#define AARCH64_RET_UNUSED5 5
+#define AARCH64_RET_UNUSED6 6
+#define AARCH64_RET_UNUSED7 7
+
+/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4,
+ so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT). */
+#define AARCH64_RET_S4 8
+#define AARCH64_RET_S3 9
+#define AARCH64_RET_S2 10
+#define AARCH64_RET_S1 11
+
+#define AARCH64_RET_D4 12
+#define AARCH64_RET_D3 13
+#define AARCH64_RET_D2 14
+#define AARCH64_RET_D1 15
+
+#define AARCH64_RET_Q4 16
+#define AARCH64_RET_Q3 17
+#define AARCH64_RET_Q2 18
+#define AARCH64_RET_Q1 19
+
+/* Note that each of the sub-64-bit integers gets two entries. */
+#define AARCH64_RET_UINT8 20
+#define AARCH64_RET_UINT16 22
+#define AARCH64_RET_UINT32 24
+
+#define AARCH64_RET_SINT8 26
+#define AARCH64_RET_SINT16 28
+#define AARCH64_RET_SINT32 30
+
+#define AARCH64_RET_MASK 31
+
+#define AARCH64_RET_IN_MEM (1 << 5)
+#define AARCH64_RET_NEED_COPY (1 << 6)
+
+#define AARCH64_FLAG_ARG_V_BIT 7
#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
#define N_X_ARG_REG 8
#endif
.text
- .align 2
+ .align 4
- .globl CNAME(ffi_call_SYSV)
+ .globl CNAME(ffi_call_SYSV)
#ifdef __ELF__
.type CNAME(ffi_call_SYSV), #function
.hidden CNAME(ffi_call_SYSV)
/* ffi_call_SYSV
extern void ffi_call_SYSV (void *stack, void *frame,
- void (*fn)(void), int flags);
+ void (*fn)(void), void *rvalue, int flags);
Therefore on entry we have:
x0 stack
x1 frame
x2 fn
- x3 flags
+ x3 rvalue
+ x4 flags
*/
cfi_startproc
cfi_rel_offset (x29, 0)
cfi_rel_offset (x30, 8)
- str w3, [x29, #16] /* save flags */
mov x9, x2 /* save fn */
+ mov x8, x3 /* install structure return */
+ stp x3, x4, [x29, #16] /* save rvalue and flags */
/* Load the vector argument passing registers, if necessary. */
- tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
- ldp q0, q1, [x29, #32 + 0]
- ldp q2, q3, [x29, #32 + 32]
- ldp q4, q5, [x29, #32 + 64]
- ldp q6, q7, [x29, #32 + 96]
+ tbz w4, #AARCH64_FLAG_ARG_V_BIT, 1f
+ ldp q0, q1, [sp, #0]
+ ldp q2, q3, [sp, #32]
+ ldp q4, q5, [sp, #64]
+ ldp q6, q7, [sp, #96]
1:
/* Load the core argument passing registers, including
the structure return pointer. */
- ldp x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0]
- ldp x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16]
- ldp x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32]
- ldp x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48]
- ldr x8, [x29, #32 + 16*N_V_ARG_REG + 64]
+ ldp x0, x1, [sp, #16*N_V_ARG_REG + 0]
+ ldp x2, x3, [sp, #16*N_V_ARG_REG + 16]
+ ldp x4, x5, [sp, #16*N_V_ARG_REG + 32]
+ ldp x6, x7, [sp, #16*N_V_ARG_REG + 48]
+
+ /* Deallocate the context, leaving the stacked arguments. */
+ add sp, sp, #CALL_CONTEXT_SIZE
blr x9 /* call fn */
- ldr w3, [x29, #16] /* reload flags */
+ ldp x3, x4, [x29, #16] /* reload rvalue and flags */
/* Partially deconstruct the stack frame. */
mov sp, x29
cfi_def_cfa_register (sp)
ldp x29, x30, [x29]
- /* Save the core return registers. */
- stp x0, x1, [sp, #32 + 16*N_V_ARG_REG]
-
- /* Save the vector return registers, if necessary. */
- tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
- stp q0, q1, [sp, #32 + 0]
- stp q2, q3, [sp, #32 + 32]
-1:
- /* All done. */
+ /* Save the return value as directed. */
+ adr x5, 0f
+ and w4, w4, #AARCH64_RET_MASK
+ add x5, x5, x4, lsl #3
+ br x5
+
+ /* Note that each table entry is 2 insns, and thus 8 bytes.
+ For integer data, note that we're storing into ffi_arg
+ and therefore we want to extend to 64 bits; these types
+ have two consecutive entries allocated for them. */
+ .align 4
+0: ret /* VOID */
+ nop
+1: str x0, [x3] /* INT64 */
+ ret
+2: stp x0, x1, [x3] /* INT128 */
+ ret
+3: brk #1000 /* UNUSED */
+ ret
+4: brk #1000 /* UNUSED */
+ ret
+5: brk #1000 /* UNUSED */
+ ret
+6: brk #1000 /* UNUSED */
+ ret
+7: brk #1000 /* UNUSED */
+ ret
+8: st4 { v0.s-v3.s }[0], [x3] /* S4 */
+ ret
+9: st3 { v0.s-v2.s }[0], [x3] /* S3 */
ret
+10: stp s0, s1, [x3] /* S2 */
+ ret
+11: str s0, [x3] /* S1 */
+ ret
+12: st4 { v0.d-v3.d }[0], [x3] /* D4 */
+ ret
+13: st3 { v0.d-v2.d }[0], [x3] /* D3 */
+ ret
+14: stp d0, d1, [x3] /* D2 */
+ ret
+15: str d0, [x3] /* D1 */
+ ret
+16: str q3, [x3, #48] /* Q4 */
+ nop
+17: str q2, [x3, #32] /* Q3 */
+ nop
+18: stp q0, q1, [x3] /* Q2 */
+ ret
+19: str q0, [x3] /* Q1 */
+ ret
+20: uxtb w0, w0 /* UINT8 */
+ str x0, [x3]
+21: ret /* reserved */
+ nop
+22: uxth w0, w0 /* UINT16 */
+ str x0, [x3]
+23: ret /* reserved */
+ nop
+24: mov w0, w0 /* UINT32 */
+ str x0, [x3]
+25: ret /* reserved */
+ nop
+26: sxtb x0, w0 /* SINT8 */
+ str x0, [x3]
+27: ret /* reserved */
+ nop
+28: sxth x0, w0 /* SINT16 */
+ str x0, [x3]
+29: ret /* reserved */
+ nop
+30: sxtw x0, w0 /* SINT32 */
+ str x0, [x3]
+31: ret /* reserved */
+ nop
cfi_endproc
#ifdef __ELF__
Voila! */
.text
- .align 2
+ .align 4
- .globl CNAME(ffi_closure_SYSV)
+ .globl CNAME(ffi_closure_SYSV)
+#ifdef __ELF__
+ .type CNAME(ffi_closure_SYSV), #function
+ .hidden CNAME(ffi_closure_SYSV)
+#endif
cfi_startproc
CNAME(ffi_closure_SYSV):
stp x29, x30, [sp, #-16]!