/* About the LINUX64 ABI. */
enum {
NUM_GPR_ARG_REGISTERS64 = 8,
- NUM_FPR_ARG_REGISTERS64 = 13
+ NUM_FPR_ARG_REGISTERS64 = 13,
+ NUM_VEC_ARG_REGISTERS64 = 12,
};
enum { ASM_NEEDS_REGISTERS64 = 4 };
of FPRs, but according to the ABI must be considered
distinct from doubles. They are also limited to a
maximum of four members in a homogeneous aggregate. */
- else
+ else if ((abi & FFI_LINUX_LONG_DOUBLE_IEEE128) == 0)
{
*elnum = 2;
return FFI_TYPE_LONGDOUBLE;
}
+ /* Fall through. */
#endif
case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE:
{
ffi_type **ptr;
unsigned bytes;
- unsigned i, fparg_count = 0, intarg_count = 0;
+ unsigned i, fparg_count = 0, intarg_count = 0, vecarg_count = 0;
unsigned flags = cif->flags;
unsigned elt, elnum, rtype;
#if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE
- /* If compiled without long double support.. */
- if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
+ /* If compiled without long double support... */
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0 ||
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ return FFI_BAD_ABI;
+#elif !defined(__VEC__)
+ /* If compiled without vector register support (used by assembly)... */
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ return FFI_BAD_ABI;
+#else
+ /* If the IEEE128 flag is set, but long double is only 64 bits wide... */
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) == 0 &&
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
return FFI_BAD_ABI;
#endif
{
#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ flags |= FLAG_RETURNS_VEC;
+ break;
+ }
if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
flags |= FLAG_RETURNS_128BITS;
/* Fall through. */
{
#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ vecarg_count++;
+ /* Align to 16 bytes, plus the 16-byte argument. */
+ intarg_count = (intarg_count + 3) & ~0x1;
+ if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
+ flags |= FLAG_ARG_NEEDS_PSAVE;
+ break;
+ }
if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
{
fparg_count++;
}
intarg_count += ((*ptr)->size + 7) / 8;
elt = discover_homogeneous_aggregate (cif->abi, *ptr, &elnum);
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ if (elt == FFI_TYPE_LONGDOUBLE &&
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ vecarg_count += elnum;
+ if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
+ flags |= FLAG_ARG_NEEDS_PSAVE;
+ break;
+ }
+ else
+#endif
if (elt)
{
fparg_count += elnum;
flags |= FLAG_FP_ARGUMENTS;
if (intarg_count > 4)
flags |= FLAG_4_GPR_ARGUMENTS;
+ if (vecarg_count != 0)
+ flags |= FLAG_VEC_ARGUMENTS;
/* Space for the FPR registers, if needed. */
if (fparg_count != 0)
bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
+ /* Space for the vector registers, if needed, aligned to 16 bytes. */
+ if (vecarg_count != 0) {
+ bytes = (bytes + 15) & ~0xF;
+ bytes += NUM_VEC_ARG_REGISTERS64 * sizeof (float128);
+ }
/* Stack space. */
#if _CALL_ELF == 2
|--------------------------------------------| |
| FPR registers f1-f13 (optional) 13*8 | |
|--------------------------------------------| |
+ | VEC registers v2-v13 (optional) 12*16 | |
+ |--------------------------------------------| |
| Parameter save area | |
|--------------------------------------------| |
| TOC save area 8 | |
unsigned long *ul;
float *f;
double *d;
+ float128 *f128;
size_t p;
} valp;
valp fpr_base;
unsigned int fparg_count;
+ /* 'vec_base' points at the space for v2, and grows upwards as
+ we use vector registers. */
+ valp vec_base;
+ unsigned int vecarg_count;
+
unsigned int i, words, nargs, nfixedargs;
ffi_type **ptr;
double double_tmp;
unsigned long **ul;
float **f;
double **d;
+ float128 **f128;
} p_argv;
unsigned long gprvalue;
unsigned long align;
#endif
fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
fparg_count = 0;
+ /* Place the vector args below the FPRs, if used, else the GPRs. */
+ if (ecif->cif->flags & FLAG_FP_ARGUMENTS)
+ vec_base.p = fpr_base.p & ~0xF;
+ else
+ vec_base.p = gpr_base.p;
+ vec_base.f128 -= NUM_VEC_ARG_REGISTERS64;
+ vecarg_count = 0;
next_arg.ul = gpr_base.ul;
/* Check that everything starts aligned properly. */
FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) gpr_base.c & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) gpr_end.c & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) vec_base.c & 0xF) == 0);
FFI_ASSERT ((bytes & 0xF) == 0);
/* Deal with return values that are actually pass-by-reference. */
{
#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
+ if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ next_arg.p = FFI_ALIGN (next_arg.p, 16);
+ if (next_arg.ul == gpr_end.ul)
+ next_arg.ul = rest.ul;
+ if (vecarg_count < NUM_VEC_ARG_REGISTERS64 && i < nfixedargs)
+ *vec_base.f128++ = **p_argv.f128;
+ else
+ *next_arg.f128 = **p_argv.f128;
+ if (++next_arg.f128 == gpr_end.f128)
+ next_arg.f128 = rest.f128;
+ vecarg_count++;
+ FFI_ASSERT (__LDBL_MANT_DIG__ == 113);
+ FFI_ASSERT (flags & FLAG_VEC_ARGUMENTS);
+ break;
+ }
if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
{
double_tmp = (*p_argv.d)[0];
void *v;
float *f;
double *d;
+ float128 *f128;
} arg;
arg.v = *p_argv.v;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ if (elt == FFI_TYPE_LONGDOUBLE &&
+ (ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ do
+ {
+ if (vecarg_count < NUM_VEC_ARG_REGISTERS64
+ && i < nfixedargs)
+ *vec_base.f128++ = *arg.f128++;
+ else
+ *next_arg.f128 = *arg.f128++;
+ if (++next_arg.f128 == gpr_end.f128)
+ next_arg.f128 = rest.f128;
+ vecarg_count++;
+ }
+ while (--elnum != 0);
+ }
+ else
+#endif
if (elt == FFI_TYPE_FLOAT)
{
do
void *user_data,
void *rvalue,
unsigned long *pst,
- ffi_dblfl *pfr)
+ ffi_dblfl *pfr,
+ float128 *pvec)
{
/* rvalue is the pointer to space for return value in closure assembly */
/* pst is the pointer to parameter save area
(r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
/* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
+ /* pvec is the pointer to where v2-v13 are stored in ffi_closure_LINUX64 */
void **avalue;
ffi_type **arg_types;
unsigned long i, avn, nfixedargs;
ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
+ float128 *end_pvec = pvec + NUM_VEC_ARG_REGISTERS64;
unsigned long align;
avalue = alloca (cif->nargs * sizeof (void *));
unsigned long *ul;
float *f;
double *d;
+ float128 *f128;
size_t p;
} to, from;
aggregate size is not greater than the space taken by
the registers so store back to the register/parameter
save arrays. */
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ if (elt == FFI_TYPE_LONGDOUBLE &&
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ if (pvec + elnum <= end_pvec)
+ to.v = pvec;
+ else
+ to.v = pst;
+ }
+ else
+#endif
if (pfr + elnum <= end_pfr)
to.v = pfr;
else
avalue[i] = to.v;
from.ul = pst;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ if (elt == FFI_TYPE_LONGDOUBLE &&
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ do
+ {
+ if (pvec < end_pvec && i < nfixedargs)
+ *to.f128 = *pvec++;
+ else
+ *to.f128 = *from.f128;
+ to.f128++;
+ from.f128++;
+ }
+ while (--elnum != 0);
+ }
+ else
+#endif
if (elt == FFI_TYPE_FLOAT)
{
do
#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
- if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ if (((unsigned long) pst & 0xF) != 0)
+ ++pst;
+ if (pvec < end_pvec && i < nfixedargs)
+ avalue[i] = pvec++;
+ else
+ avalue[i] = pst;
+ pst += 2;
+ break;
+ }
+ else if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
{
if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
{
/* Tell ffi_closure_LINUX64 how to perform return type promotions. */
if ((cif->flags & FLAG_RETURNS_SMST) != 0)
{
- if ((cif->flags & FLAG_RETURNS_FP) == 0)
+ if ((cif->flags & (FLAG_RETURNS_FP | FLAG_RETURNS_VEC)) == 0)
return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
+ else if ((cif->flags & FLAG_RETURNS_VEC) != 0)
+ return FFI_V2_TYPE_VECTOR_HOMOG;
else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
return FFI_V2_TYPE_DOUBLE_HOMOG;
else
return FFI_V2_TYPE_FLOAT_HOMOG;
}
+ if ((cif->flags & FLAG_RETURNS_VEC) != 0)
+ return FFI_V2_TYPE_VECTOR;
return cif->rtype->type;
}
#endif
enum {
/* The assembly depends on these exact flags. */
/* These go in cr7 */
- FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */
+ FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */
FLAG_RETURNS_NOTHING = 1 << (31-30),
FLAG_RETURNS_FP = 1 << (31-29),
- FLAG_RETURNS_64BITS = 1 << (31-28),
+ FLAG_RETURNS_VEC = 1 << (31-28),
- /* This goes in cr6 */
- FLAG_RETURNS_128BITS = 1 << (31-27),
+ /* These go in cr6 */
+ FLAG_RETURNS_64BITS = 1 << (31-27),
+ FLAG_RETURNS_128BITS = 1 << (31-26),
- FLAG_COMPAT = 1 << (31- 8), /* Not used by assembly */
+ FLAG_COMPAT = 1 << (31- 8), /* Not used by assembly */
/* These go in cr1 */
FLAG_ARG_NEEDS_COPY = 1 << (31- 7), /* Used by sysv code */
FLAG_ARG_NEEDS_PSAVE = FLAG_ARG_NEEDS_COPY, /* Used by linux64 code */
FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
FLAG_4_GPR_ARGUMENTS = 1 << (31- 5),
- FLAG_RETVAL_REFERENCE = 1 << (31- 4)
+ FLAG_RETVAL_REFERENCE = 1 << (31- 4),
+ FLAG_VEC_ARGUMENTS = 1 << (31- 3),
};
typedef union
double d;
} ffi_dblfl;
+#if defined(__FLOAT128_TYPE__)
+typedef _Float128 float128;
+#elif defined(__FLOAT128__)
+typedef __float128 float128;
+#else
+typedef __int128 float128;
+#endif
+
void FFI_HIDDEN ffi_closure_SYSV (void);
void FFI_HIDDEN ffi_go_closure_sysv (void);
void FFI_HIDDEN ffi_call_SYSV(extended_cif *, void (*)(void), void *,
void (*) (ffi_cif *, void *,
void **, void *),
void *, void *,
- unsigned long *, ffi_dblfl *);
+ unsigned long *, ffi_dblfl *,
+ float128 *);
ld %r2, 8(%r29)
# endif
/* Now do the call. */
- /* Set up cr1 with bits 4-7 of the flags. */
- mtcrf 0x40, %r31
+ /* Set up cr1 with bits 3-7 of the flags. */
+ mtcrf 0xc0, %r31
/* Get the address to call into CTR. */
mtctr %r12
/* Load all those argument registers. */
- ld %r3, -32-(8*8)(%r28)
- ld %r4, -32-(7*8)(%r28)
- ld %r5, -32-(6*8)(%r28)
- ld %r6, -32-(5*8)(%r28)
+ addi %r29, %r28, -32-(8*8)
+ ld %r3, (0*8)(%r29)
+ ld %r4, (1*8)(%r29)
+ ld %r5, (2*8)(%r29)
+ ld %r6, (3*8)(%r29)
bf- 5, 1f
- ld %r7, -32-(4*8)(%r28)
- ld %r8, -32-(3*8)(%r28)
- ld %r9, -32-(2*8)(%r28)
- ld %r10, -32-(1*8)(%r28)
+ ld %r7, (4*8)(%r29)
+ ld %r8, (5*8)(%r29)
+ ld %r9, (6*8)(%r29)
+ ld %r10, (7*8)(%r29)
1:
/* Load all the FP registers. */
bf- 6, 2f
- lfd %f1, -32-(21*8)(%r28)
- lfd %f2, -32-(20*8)(%r28)
- lfd %f3, -32-(19*8)(%r28)
- lfd %f4, -32-(18*8)(%r28)
- lfd %f5, -32-(17*8)(%r28)
- lfd %f6, -32-(16*8)(%r28)
- lfd %f7, -32-(15*8)(%r28)
- lfd %f8, -32-(14*8)(%r28)
- lfd %f9, -32-(13*8)(%r28)
- lfd %f10, -32-(12*8)(%r28)
- lfd %f11, -32-(11*8)(%r28)
- lfd %f12, -32-(10*8)(%r28)
- lfd %f13, -32-(9*8)(%r28)
+ addi %r29, %r29, -(14*8)
+ lfd %f1, ( 1*8)(%r29)
+ lfd %f2, ( 2*8)(%r29)
+ lfd %f3, ( 3*8)(%r29)
+ lfd %f4, ( 4*8)(%r29)
+ lfd %f5, ( 5*8)(%r29)
+ lfd %f6, ( 6*8)(%r29)
+ lfd %f7, ( 7*8)(%r29)
+ lfd %f8, ( 8*8)(%r29)
+ lfd %f9, ( 9*8)(%r29)
+ lfd %f10, (10*8)(%r29)
+ lfd %f11, (11*8)(%r29)
+ lfd %f12, (12*8)(%r29)
+ lfd %f13, (13*8)(%r29)
2:
+ /* Load all the vector registers. */
+ bf- 3, 3f
+ addi %r29, %r29, -16
+ lvx %v13, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v12, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v11, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v10, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v9, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v8, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v7, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v6, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v5, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v4, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v3, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v2, 0, %r29
+3:
+
/* Make the call. */
ld %r11, 8(%r28)
bctrl
bt 31, .Lstruct_return_value
bt 30, .Ldone_return_value
bt 29, .Lfp_return_value
+ bt 28, .Lvec_return_value
std %r3, 0(%r30)
/* Fall through... */
ld %r31, -8(%r1)
blr
+.Lvec_return_value:
+ stvx %v2, 0, %r30
+ b .Ldone_return_value
+
.Lfp_return_value:
.cfi_def_cfa_register 28
- bf 28, .Lfloat_return_value
- stfd %f1, 0(%r30)
mtcrf 0x02, %r31 /* cr6 */
- bf 27, .Ldone_return_value
+ bf 27, .Lfloat_return_value
+ stfd %f1, 0(%r30)
+ bf 26, .Ldone_return_value
stfd %f2, 8(%r30)
b .Ldone_return_value
.Lfloat_return_value:
b .Ldone_return_value
.Lstruct_return_value:
- bf 29, .Lsmall_struct
- bf 28, .Lfloat_homog_return_value
+ bf 29, .Lvec_homog_or_small_struct
+ mtcrf 0x02, %r31 /* cr6 */
+ bf 27, .Lfloat_homog_return_value
stfd %f1, 0(%r30)
stfd %f2, 8(%r30)
stfd %f3, 16(%r30)
stfs %f8, 28(%r30)
b .Ldone_return_value
+.Lvec_homog_or_small_struct:
+ bf 28, .Lsmall_struct
+ stvx %v2, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v3, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v4, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v5, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v6, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v7, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v8, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v9, 0, %r30
+ b .Ldone_return_value
+
.Lsmall_struct:
std %r3, 0(%r30)
std %r4, 8(%r30)