From: Anthony Green Date: Thu, 2 Feb 2023 11:59:46 +0000 (-0500) Subject: Add HPPA64 support X-Git-Tag: upstream/3.4.7~80 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=baa6bbbc176b3c572eaf05b3fae0311df7e5f7b7;p=platform%2Fupstream%2Flibffi.git Add HPPA64 support --- diff --git a/Makefile.am b/Makefile.am index a4a4887..fde7ec2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -81,7 +81,7 @@ EXTRA_libffi_la_SOURCES = src/aarch64/ffi.c src/aarch64/sysv.S \ src/mips/n32.S src/moxie/ffi.c src/moxie/eabi.S \ src/nios2/ffi.c src/nios2/sysv.S src/or1k/ffi.c \ src/or1k/sysv.S src/pa/ffi.c src/pa/linux.S src/pa/hpux32.S \ - src/powerpc/ffi.c src/powerpc/ffi_sysv.c \ + src/pa/hpux64.S src/powerpc/ffi.c src/powerpc/ffi_sysv.c \ src/powerpc/ffi_linux64.c src/powerpc/sysv.S \ src/powerpc/linux64.S src/powerpc/linux64_closure.S \ src/powerpc/ppc_closure.S src/powerpc/aix.S \ diff --git a/README.md b/README.md index a5b60e8..ddad877 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ tested: | Blackfin | uClinux | GCC | | CSKY | Linux | GCC | | HPPA | HPUX | GCC | +| HPPA64 | HPUX | GCC | | KVX | Linux | GCC | | IA-64 | Linux | GCC | | LoongArch64 | Linux | GCC | @@ -197,6 +198,10 @@ History See the git log for details at http://github.com/libffi/libffi. + TBD - TBD + Add support for HPPA64. + Many x86 Darwin fixes. + 3.4.4 Oct-23-2022 Important aarch64 fixes, including support for linux builds with Link Time Optimization (-flto). @@ -474,6 +479,7 @@ developers: nios ii Sandra Loosemore openrisc Sebastian Macke pa Randolph Chung, Dave Anglin, Andreas Tobler + pa64 Dave Anglin powerpc Geoffrey Keating, Andreas Tobler, David Edelsohn, John Hornkvist powerpc64 Jakub Jelinek diff --git a/configure.host b/configure.host index f23716f..a4ca511 100644 --- a/configure.host +++ b/configure.host @@ -73,6 +73,7 @@ case "${host}" in ;; hppa*64-*-hpux*) TARGET=PA64_HPUX; TARGETDIR=pa + SOURCES="ffi64.c hpux64.S" ;; hppa*-*-hpux*) TARGET=PA_HPUX; TARGETDIR=pa diff --git a/src/pa/ffi64.c b/src/pa/ffi64.c new file mode 100644 index 0000000..08807c8 --- /dev/null +++ b/src/pa/ffi64.c @@ -0,0 +1,614 @@ +/* ----------------------------------------------------------------------- + ffi64.c - (c) 2022 John David Anglin + + HPPA Foreign Function Interface + PA 64-Bit ABI support + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include +#include + +#include +#include + +#define ROUND_UP(v, a) (((size_t)(v) + (a) - 1) & ~((a) - 1)) + +#define FIRST_ARG_SLOT 0 +#define DEBUG_LEVEL 0 + +#define fldw(addr, fpreg) \ + __asm__ volatile ("fldw 4(%0), %%" #fpreg "R" : : "r"(addr) : #fpreg) +#define fstw(fpreg, addr) \ + __asm__ volatile ("fstw %%" #fpreg "R, 4(%0)" : : "r"(addr)) +#define fldd(addr, fpreg) \ + __asm__ volatile ("fldd 0(%0), %%" #fpreg "L" : : "r"(addr) : #fpreg) +#define fstd(fpreg, addr) \ + __asm__ volatile ("fstd %%" #fpreg "L, 0(%0)" : : "r"(addr)) + +#define debug(lvl, x...) do { if (lvl <= DEBUG_LEVEL) { printf(x); } } while (0) + +static inline int ffi_struct_type(ffi_type *t) +{ + int sz = t->size; + + /* Small structure results are returned in registers 28 and 29, + larger ones are in a buffer allocated by the callee. The + address of the buffer is passed in r28. The buffer is supposed + to be aligned on a 16-byte boundary. Register return values are + padded on the right. The pad bits on the right are undefined. */ + + if (sz <= 16) + return -sz; + else + return FFI_TYPE_STRUCT; +} + +/* PA has a downward growing stack, which looks like this. Stack + arguments are offset from the argument ponter (AP) in r29. + + Offset + [ Fixed args ] + AP-64 arg word 0 (r26, fr4) + AP-56 arg word 1 (r25, fr5) + AP-48 arg word 2 (r24, fr6) + AP-40 arg word 3 (r23, fr7) + AP-32 arg word 4 (r22, fr8) + AP-24 arg word 5 (r21, fr9) + AP-16 arg word 6 (r20, fr10) + AP-8 arg word 7 (r19, fr11) + [ Variable args; AP = SP-16 if there are no variable args ] + AP stack arg 0 + AP+8 stack arg 1 + ... + [ Frame marker ] + SP-16 RP + SP-8 previous SP + + The first eight argument words on the stack are reserved for use by + the callee. Instead, the general and floating registers replace + the first four argument slots. Non FP arguments are passed solely + in the general registers. Single and double FP arguments are passed + in both general and floating registers when using libffi. + + The registers are allocated in the same manner as stack slots. + This allows the callee to save its arguments on the stack if + necessary: + + arg word 0 -> gr26 or fr4L or fr4R + arg word 1 -> gr25 or fr5L or fr5R + arg word 2 -> gr24 or fr6L or fr6R + arg word 3 -> gr23 or fr7L or fr7R + ... + + Single Single-precision floating-point parameters, when passed in + floating-point registers, are passed in the right halves of the + floating point registers; the left halves are unused. + + Quad-precision floating-point parameters within the first 64 bytes of + the parameter list are always passed in general registers. + + The rest of the arguments are passed on the stack starting at AP. + + This means we can have holes either in the register allocation, + or in the stack. */ + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments + + The following code will put everything into the stack frame + (which was allocated by the asm routine), and on return + the asm routine will load the arguments that should be + passed by register into the appropriate registers + + NOTE: We load floating point args in this function... that means we + assume gcc will not mess with fp regs in here. */ + +void ffi_prep_args_pa64(UINT64 *stack, extended_cif *ecif, unsigned bytes) +{ + register unsigned int i; + register ffi_type **p_arg; + register void **p_argv; + unsigned int slot = FIRST_ARG_SLOT; + size_t len; + + debug(1, "%s: stack = %p, ecif = %p, bytes = %u\n", __FUNCTION__, stack, + ecif, bytes); + + p_arg = ecif->cif->arg_types; + p_argv = ecif->avalue; + + for (i = 0; i < ecif->cif->nargs; i++) + { + int type = (*p_arg)->type; + + len = (*p_arg)->size; + + switch (type) + { + case FFI_TYPE_SINT8: + *(SINT64 *)(stack + slot) = *(SINT8 *)(*p_argv); + break; + + case FFI_TYPE_UINT8: + *(UINT64 *)(stack + slot) = *(UINT8 *)(*p_argv); + break; + + case FFI_TYPE_SINT16: + *(SINT64 *)(stack + slot) = *(SINT16 *)(*p_argv); + break; + + case FFI_TYPE_UINT16: + *(UINT64 *)(stack + slot) = *(UINT16 *)(*p_argv); + break; + + case FFI_TYPE_SINT32: + *(SINT64 *)(stack + slot) = *(SINT32 *)(*p_argv); + break; + + case FFI_TYPE_UINT32: + *(UINT64 *)(stack + slot) = *(UINT32 *)(*p_argv); + break; + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_POINTER: + debug(3, "Storing UINT64 %lu in slot %u\n", *(UINT64 *)(*p_argv), + slot); + *(UINT64 *)(stack + slot) = *(UINT64 *)(*p_argv); + break; + + case FFI_TYPE_FLOAT: + /* First 8 args go in fr4L - fr11L. */ + debug(3, "Storing UINT32(float) in slot %u\n", slot); + *(UINT64 *)(stack + slot) = *(UINT32 *)(*p_argv); + switch (slot - FIRST_ARG_SLOT) + { + /* First 4 args go in fr4L - fr7L. */ + case 0: fldw(stack + slot, fr4); break; + case 1: fldw(stack + slot, fr5); break; + case 2: fldw(stack + slot, fr6); break; + case 3: fldw(stack + slot, fr7); break; + case 4: fldw(stack + slot, fr8); break; + case 5: fldw(stack + slot, fr9); break; + case 6: fldw(stack + slot, fr10); break; + case 7: fldw(stack + slot, fr11); break; + } + break; + + case FFI_TYPE_DOUBLE: + debug(3, "Storing UINT64(double) at slot %u\n", slot); + *(UINT64 *)(stack + slot) = *(UINT64 *)(*p_argv); + switch (slot - FIRST_ARG_SLOT) + { + /* First 8 args go in fr4 to fr11. */ + case 0: fldd(stack + slot, fr4); break; + case 1: fldd(stack + slot, fr5); break; + case 2: fldd(stack + slot, fr6); break; + case 3: fldd(stack + slot, fr7); break; + case 4: fldd(stack + slot, fr8); break; + case 5: fldd(stack + slot, fr9); break; + case 6: fldd(stack + slot, fr10); break; + case 7: fldd(stack + slot, fr11); break; + } + break; + +#ifdef PA64_HPUX + case FFI_TYPE_LONGDOUBLE: + /* Align slot to a 16-byte boundary. */ + slot += (slot & 1); + *(UINT64 *)(stack + slot) = *(UINT64 *)(*p_argv); + *(UINT64 *)(stack + slot + 1) = *(UINT64 *)(*p_argv + 8); + break; +#endif + + case FFI_TYPE_STRUCT: + /* Structs larger than 8 bytes are aligned on a 16-byte boundary. */ + if (len > 8) + slot += (slot & 1); + memcpy((char *)(stack + slot), (char *)*p_argv, len); + break; + + default: + FFI_ASSERT(0); + } + + slot += ROUND_UP (len, 8) >> 3; + p_arg++; + p_argv++; + } + + FFI_ASSERT(slot * 8 <= bytes); + + return; +} + +static void ffi_size_stack_pa64(ffi_cif *cif) +{ + ffi_type **ptr; + int i; + int z = 0; /* # stack slots */ + + for (ptr = cif->arg_types, i = 0; i < cif->nargs; ptr++, i++) + { + int type = (*ptr)->type; + int size = (*ptr)->size; + + switch (type) + { +#ifdef PA64_HPUX + case FFI_TYPE_LONGDOUBLE: + z += 2 + (z & 1); + break; +#endif + + case FFI_TYPE_STRUCT: + if (size > 8) + z += (z & 1); + z += ROUND_UP (size, 8) >> 3; + break; + + default: /* 64-bit values */ + z++; + } + } + + /* We need a minimum of 8 argument slots. Stack must be 16-byte + aligned. */ + if (z <= 8) + z = 8; + else + z += (z & 1); + + /* Add 16 bytes for frame marker. */ + cif->bytes = z * 8 + 64; + debug(3, "Calculated stack size is %u bytes\n", cif->bytes); +} + +/* Perform machine dependent cif processing. */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + /* Set the return type flag for jump table. */ + switch (cif->rtype->type) + { + case FFI_TYPE_COMPLEX: + case FFI_TYPE_STRUCT: + /* For the return type we have to check the size of the structures. + If the size is smaller or equal 8 bytes, the result is given back + in one register. If the size is smaller or equal 16 bytes than we + return the result in two registers. If the size is bigger than + 16 bytes, the return is in a buffer allocated by the caller. */ + cif->flags = ffi_struct_type(cif->rtype); + break; + + default: + cif->flags = (unsigned) cif->rtype->type; + break; + } + + /* Lucky us, because of the unique PA ABI we get to do our + own stack sizing. */ + switch (cif->abi) + { + case FFI_PA64: + ffi_size_stack_pa64(cif); + break; + + default: + FFI_ASSERT(0); + break; + } + + return FFI_OK; +} + +extern void ffi_call_pa64(void (*)(UINT64 *, extended_cif *, unsigned), + extended_cif *, unsigned, unsigned, unsigned *, + void (*fn)(void)); + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return + value address then we need to make one. */ + + if (rvalue == NULL + && (cif->rtype->type == FFI_TYPE_STRUCT + || cif->rtype->type == FFI_TYPE_COMPLEX) + && cif->rtype->size > 16) + ecif.rvalue = alloca(ROUND_UP (cif->rtype->size, 16)); + else + ecif.rvalue = rvalue; + + + switch (cif->abi) + { + case FFI_PA64: + debug(3, "Calling ffi_call_pa64: ecif=%p, bytes=%u, flags=%u, rvalue=%p, fn=%p\n", &ecif, cif->bytes, cif->flags, ecif.rvalue, (void *)fn); + ffi_call_pa64(ffi_prep_args_pa64, &ecif, cif->bytes, + cif->flags, ecif.rvalue, fn); + break; + + default: + FFI_ASSERT(0); + break; + } +} + +#if FFI_CLOSURES +/* This is more-or-less an inverse of ffi_call -- we have arguments on + the stack, and we need to fill them into a cif structure and invoke + the user function. This really ought to be in asm to make sure + the compiler doesn't do things we don't expect. */ +ffi_status ffi_closure_inner_pa64(ffi_closure *closure, UINT64 *stack) +{ + ffi_cif *cif; + void **avalue; + void *rvalue; + /* Functions can return up to 128-bits in registers. Return address + must be double word aligned. */ + union { long double rld; UINT64 ret[2]; } u; + ffi_type **p_arg; + char *tmp; + int i, avn; + unsigned int slot = FIRST_ARG_SLOT; + register UINT64 r28 asm("r28"); + + cif = closure->cif; + + /* If returning via structure, callee will write to our pointer. */ + if (cif->flags == FFI_TYPE_STRUCT) + rvalue = (void *)r28; + else + rvalue = &u; + + avalue = (void **)alloca(cif->nargs * FFI_SIZEOF_ARG); + avn = cif->nargs; + p_arg = cif->arg_types; + + for (i = 0; i < avn; i++) + { + int type = (*p_arg)->type; + + switch (type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + avalue[i] = (void *)(stack + slot) + 7; + break; + + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + avalue[i] = (void *)(stack + slot) + 6; + break; + + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + avalue[i] = (void *)(stack + slot) + 4; + break; + + case FFI_TYPE_POINTER: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + avalue[i] = (void *)(stack + slot); + break; + + case FFI_TYPE_FLOAT: + /* The closure call is indirect. In Linux, floating point + arguments in indirect calls with a prototype are passed + in the floating point registers instead of the general + registers. So, we need to replace what was previously + stored in the current slot with the value in the + corresponding floating point register. */ + switch (slot + FIRST_ARG_SLOT) + { + case 0: fstw(fr4, (void *)(stack + slot)); break; + case 1: fstw(fr5, (void *)(stack + slot)); break; + case 2: fstw(fr6, (void *)(stack + slot)); break; + case 3: fstw(fr7, (void *)(stack + slot)); break; + case 4: fstw(fr8, (void *)(stack + slot)); break; + case 5: fstw(fr9, (void *)(stack + slot)); break; + case 6: fstw(fr10, (void *)(stack + slot)); break; + case 7: fstw(fr11, (void *)(stack + slot)); break; + } + avalue[i] = (void *)(stack + slot) + 4; + break; + + case FFI_TYPE_DOUBLE: + /* See previous comment for FFI_TYPE_FLOAT. */ + switch (slot + FIRST_ARG_SLOT) + { + case 0: fstd(fr4, (void *)(stack + slot)); break; + case 1: fstd(fr5, (void *)(stack + slot)); break; + case 2: fstd(fr6, (void *)(stack + slot)); break; + case 3: fstd(fr7, (void *)(stack + slot)); break; + case 4: fstd(fr8, (void *)(stack + slot)); break; + case 5: fstd(fr9, (void *)(stack + slot)); break; + case 6: fstd(fr10, (void *)(stack + slot)); break; + case 7: fstd(fr11, (void *)(stack + slot)); break; + } + avalue[i] = (void *)(stack + slot); + break; + +#ifdef PA64_HPUX + case FFI_TYPE_LONGDOUBLE: + /* Long doubles are treated like a big structure. */ + slot += (slot & 1); + avalue[i] = (void *)(stack + slot); + break; +#endif + + case FFI_TYPE_STRUCT: + /* All structs are passed in registers. Structs larger + than 8 bytes are aligned on a 16-byte boundary. */ + if((*p_arg)->size > 8) + slot += (slot & 1); + avalue[i] = (void *) (stack + slot); + break; + + default: + FFI_ASSERT(0); + } + + slot += (ROUND_UP ((*p_arg)->size, 8) >> 3); + p_arg++; + } + + /* Invoke the closure. */ + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + debug(3, "after calling function, ret[0] = %16lx, ret[1] = %16lx\n", u.ret[0], + u.ret[1]); + + /* Store the result using the lower 2 bytes of the flags. */ + switch (cif->flags) + { + case FFI_TYPE_UINT8: + *(stack + FIRST_ARG_SLOT) = (UINT8)u.ret[0]; + break; + case FFI_TYPE_SINT8: + *(stack + FIRST_ARG_SLOT) = (SINT8)u.ret[0]; + break; + case FFI_TYPE_UINT16: + *(stack + FIRST_ARG_SLOT) = (UINT16)u.ret[0]; + break; + case FFI_TYPE_SINT16: + *(stack + FIRST_ARG_SLOT) = (SINT16)u.ret[0]; + break; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + *(stack + FIRST_ARG_SLOT) = (SINT32)u.ret[0]; + break; + case FFI_TYPE_UINT32: + *(stack - FIRST_ARG_SLOT) = (UINT32)u.ret[0]; + break; + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + *(stack - FIRST_ARG_SLOT) = u.ret[0]; + break; + + case FFI_TYPE_LONGDOUBLE: + *(stack + FIRST_ARG_SLOT) = u.ret[0]; + *(stack + FIRST_ARG_SLOT + 1) = u.ret[1]; + break; + + case FFI_TYPE_DOUBLE: + fldd(rvalue, fr4); + break; + + case FFI_TYPE_FLOAT: + /* Adjust for address adjustment in fldw macro. */ + fldw(rvalue - 4, fr4); + break; + + case FFI_TYPE_STRUCT: + /* Don't need a return value, done by caller. */ + break; + + case -1: + case -2: + case -3: + case -4: + case -5: + case -6: + case -7: + case -8: + case -9: + case -10: + case -11: + case -12: + case -13: + case -14: + case -15: + case -16: + tmp = (void*)(stack + FIRST_ARG_SLOT); + memcpy((void*)tmp, &u, cif->rtype->size); + break; + + case FFI_TYPE_VOID: + break; + + default: + debug(0, "assert with cif->flags: %d\n",cif->flags); + FFI_ASSERT(0); + break; + } + return FFI_OK; +} + +/* Fill in a closure to refer to the specified fun and user_data. + cif specifies the argument and result types for fun. + The cif must already be prep'ed. */ + +extern void ffi_closure_pa64(void); + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + /* The layout of a function descriptor. */ + struct pa64_fd + { + UINT64 tmp1; + UINT64 tmp2; + UINT64 code_pointer; + UINT64 gp; + }; + + struct ffi_pa64_trampoline_struct + { + UINT64 real_gp; /* Real gp value. */ + UINT64 tmp2; + UINT64 code_pointer; /* Pointer to ffi_closure_unix. */ + UINT64 fake_gp; /* Pointer to closure, installed as gp. */ + }; + + struct ffi_pa64_trampoline_struct *tramp; + struct pa64_fd *fd; + + if (cif->abi != FFI_PA64) + return FFI_BAD_ABI; + + /* Get function descriptor address for ffi_closure_pa64. */ + fd = (struct pa64_fd *)((UINT64)ffi_closure_pa64); + + /* Setup trampoline. */ + tramp = (struct ffi_pa64_trampoline_struct *)closure->tramp; + tramp->code_pointer = fd->code_pointer; + tramp->fake_gp = (UINT64)codeloc; + tramp->real_gp = fd->gp; + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} +#endif diff --git a/src/pa/ffitarget.h b/src/pa/ffitarget.h index df1209e..6a2c5dc 100644 --- a/src/pa/ffitarget.h +++ b/src/pa/ffitarget.h @@ -54,7 +54,6 @@ typedef enum ffi_abi { #endif #ifdef PA64_HPUX -#error "PA64_HPUX FFI is not yet implemented" FFI_PA64, FFI_LAST_ABI, FFI_DEFAULT_ABI = FFI_PA64 @@ -68,7 +67,11 @@ typedef enum ffi_abi { #define FFI_CLOSURES 1 #define FFI_NATIVE_RAW_API 0 +#if defined(PA64_HPUX) +#define FFI_TRAMPOLINE_SIZE 32 +#else #define FFI_TRAMPOLINE_SIZE 12 +#endif #define FFI_TYPE_SMALL_STRUCT2 -1 #define FFI_TYPE_SMALL_STRUCT3 -2 diff --git a/src/pa/hpux64.S b/src/pa/hpux64.S new file mode 100644 index 0000000..6a82b57 --- /dev/null +++ b/src/pa/hpux64.S @@ -0,0 +1,681 @@ +/* ----------------------------------------------------------------------- + hpux64.S - (c) 2005-2022 John David Anglin + + HPUX PA 64-Bit Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include +#include + + .LEVEL 2.0w + .text + .align 4 + + /* void ffi_call_pa64(void (*)(char *, extended_cif *), + extended_cif *ecif, + unsigned bytes, + unsigned flags, + unsigned *rvalue, + void (*fn)()); + */ + + .export ffi_call_pa64,code + .import ffi_prep_args_pa64,code + + .align 4 + +L$FB1 +ffi_call_pa64 + .proc + .callinfo FRAME=48,CALLS,SAVE_RP,ENTRY_GR=4 + .entry + std %rp, -16(%sp) + copy %r3, %r1 +L$CFI11 + copy %sp, %r3 +L$CFI12 + std,ma %r1, 48(%sp) + + /* Setup the stack for calling prep_args... + We want the stack to look like this: + + [ Previous stack ] <- %r3 + + [ 48-byte register save area ] + + [ Stack space for call arguments ] <- %r4 + + [ 16-byte rame marker ] + + [ 128-byte stack for calling prep_args ] <- %sp + */ + + std %r4, 8(%r3) ; save r4 +L$CFI13 + std %r23, 16(%r3) ; save flags we need it later + std %r22, 24(%r3) ; save rvalue + std %r21, 32(%r3) ; save fn pointer + + copy %sp, %r4 + copy %r4, %r26 ; argument stack pointer + addl %r24, %sp, %sp ; allocate argument space + + ldo 112(%sp), %r29 ; arg pointer for prep args + + /* Call prep_args: + %arg0(stack) -- set up above to point to call arguments + %arg1(ecif) -- same as incoming param + %arg2(bytes) -- same as incoming param */ + bl ffi_prep_args_pa64,%r2 + ldo 128(%sp), %sp + ldo -128(%sp), %sp + + /* Load the arguments that should be passed in registers + The fp args were loaded by the prep_args function. */ + ldd 0(%r4), %r26 + ldd 8(%r4), %r25 + ldd 16(%r4), %r24 + ldd 24(%r4), %r23 + ldd 32(%r4), %r22 + ldd 40(%r4), %r21 + ldd 48(%r4), %r20 + ldd 56(%r4), %r19 + + ldd 24(%r3), %ret0 ; %ret0 <- rvalue + + ldd 32(%r3), %r1 ; %r1 <- function pointer + ldd 16(%r1), %rp ; fn address + ldd 24(%r1), %dp ; New gp + bve,l (%rp), %r2 ; Call the user function + ldo 64(%r4), %r29 ; Argument pointer + + /* Prepare to store the result; recover flags and rvalue. */ + ldd 16(%r3), %r21 ; r21 <- flags + extrd,s %r21, 63, 32, %r21 ; sign extend flags for blr + + /* Adjust flags range from [-16, 15] to [0, 31]. */ + addi 16, %r21, %r21 + + blr %r21, %r0 + ldd 24(%r3), %r20 ; r20 <- rvalue + + /* Giant jump table */ + /* 16-byte small struct */ + b,n L$smst16 + nop + /* 15-byte small struct */ + b,n L$smst15 + nop + /* 14-byte small struct */ + b,n L$smst14 + nop + /* 13-byte small struct */ + b,n L$smst13 + nop + /* 12-byte small struct */ + b,n L$smst12 + nop + /* 11-byte small struct */ + b,n L$smst11 + nop + /* 10-byte small struct */ + b,n L$smst10 + nop + /* 9-byte small struct */ + b,n L$smst9 + nop + /* 8-byte small struct */ + b,n L$smst8 + nop + /* 7-byte small struct */ + b,n L$smst7 + nop + /* 6-byte small struct */ + b,n L$smst6 + nop + /* 5-byte small struct */ + b,n L$smst5 + nop + /* 4-byte small struct */ + b,n L$smst4 + nop + /* 3-byte small struct */ + b,n L$smst3 + nop + /* 2-byte small struct */ + b,n L$smst2 + nop + /* 1-byte small struct */ + b,n L$smst1 + nop + /* void */ + b,n L$done + nop + /* int */ + b L$done + std %ret0, 0(%r20) + /* float */ + b L$done + fstw %fr4R, 0(%r20) + /* double */ + b L$done + fstd %fr4, 0(%r20) + /* long double */ + b,n L$longdouble + nop + /* unsigned int8 */ + b L$done + std %ret0, 0(%r20) + /* signed int8 */ + b L$done + std %ret0, 0(%r20) + /* unsigned int16 */ + b L$done + std %ret0, 0(%r20) + /* signed int16 */ + b L$done + std %ret0, 0(%r20) + /* unsigned int32 */ + b L$done + std %ret0, 0(%r20) + /* signed int32 */ + b L$done + std %ret0, 0(%r20) + /* unsigned int64 */ + b L$done + std %ret0, 0(%r20) + /* signed int64 */ + b L$done + std %ret0, 0(%r20) + /* large struct */ + b,n L$done + nop + /* pointer */ + b L$done + std %ret0, 0(%r20) + /* complex */ + b,n L$done + nop + +L$longdouble + std %ret0, 0(%r20) + b L$done + std %ret1, 8(%r20) + + /* We need to copy byte-by-byte the exact number bytes + in the struct to avoid clobbering other data. */ +L$smst1 + extrd,u %ret0, 7, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst2 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst3 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst4 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst5 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst6 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst7 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst8 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + stb,ma %r22, 1(%r20) + b L$done + stb %ret0, 0(%r20) + +L$smst9 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + stb,ma %r22, 1(%r20) + stb,ma %ret0, 1(%r20) + extrd,u %ret1, 7, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst10 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + stb,ma %r22, 1(%r20) + stb,ma %ret0, 1(%r20) + extrd,u %ret1, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 15, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst11 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + stb,ma %r22, 1(%r20) + stb,ma %ret0, 1(%r20) + extrd,u %ret1, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 23, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst12 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + stb,ma %r22, 1(%r20) + stb,ma %ret0, 1(%r20) + extrd,u %ret1, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 31, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst13 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + stb,ma %r22, 1(%r20) + stb,ma %ret0, 1(%r20) + extrd,u %ret1, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 39, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst14 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + stb,ma %r22, 1(%r20) + stb,ma %ret0, 1(%r20) + extrd,u %ret1, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 47, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst15 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + stb,ma %r22, 1(%r20) + stb,ma %ret0, 1(%r20) + extrd,u %ret1, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 55, 8, %r22 + b L$done + stb %r22, 0(%r20) + +L$smst16 + extrd,u %ret0, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret0, 55, 8, %r22 + stb,ma %r22, 1(%r20) + stb,ma %ret0, 1(%r20) + extrd,u %ret1, 7, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 15, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 23, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 31, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 39, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 47, 8, %r22 + stb,ma %r22, 1(%r20) + extrd,u %ret1, 55, 8, %r22 + stb,ma %r22, 1(%r20) + stb %ret1, 0(%r20) + +L$done + /* all done, restore registers and return */ + copy %r4, %sp + ldd 8(%r3), %r4 + ldd -16(%r3), %rp + bve (%rp) + ldd,mb -48(%sp), %r3 + .exit + .procend +L$FE1 + .size ffi_call_pa64, .-ffi_call_pa64 + + /* void ffi_closure_pa64(void); + Called with closure argument in %r21 */ + + .export ffi_closure_pa64,code + .import ffi_closure_inner_pa64,code + .align 4 +L$FB2 +ffi_closure_pa64 + .proc + .callinfo FRAME=128,CALLS,SAVE_RP,ENTRY_GR=3 + .entry + + std %rp, -16(%sp) + copy %r3, %r1 +L$CFI21 + copy %sp, %r3 +L$CFI22 + std,ma %r1, 128(%sp) +L$CFI23 + + /* Put arguments onto the stack and call ffi_closure_inner. */ + std %r26, -64(%r29) + std %r25, -56(%r29) + std %r24, -48(%r29) + std %r23, -40(%r29) + std %r22, -32(%r29) + std %r21, -24(%r29) + std %r20, -16(%r29) + std %r19, -8(%r29) + + /* Load and save start of argument stack. */ + ldo -64(%r29), %r25 + std %r25, 8(%r3) + + /* Setup arg pointer. */ + ldo -16(%sp), %ret1 + + /* Retrieve closure pointer and real gp. */ + copy %dp, %r26 + bl ffi_closure_inner_pa64, %r2 + ldd 0(%dp), %dp + + /* Retrieve start of argument stack. */ + ldd 8(%r3), %r1 + + /* Restore r3 and op stack. */ + ldd,mb -128(%sp), %r3 + + /* Load return address. */ + ldd -16(%sp), %rp + + /* Load return values from first and second stack slots. */ + ldd 0(%r1), %ret0 + bve (%rp) + ldd 8(%r1), %ret1 + + .exit + .procend + .end +L$FE2: + .size ffi_closure_pa64, .-ffi_closure_pa64 + + .section .eh_frame,"aw",@progbits +L$frame1: + .word L$ECIE1-L$SCIE1 ;# Length of Common Information Entry +L$SCIE1: + .word 0x0 ;# CIE Identifier Tag + .byte 0x3 ;# CIE Version + .stringz "" ;# CIE Augmentation + .uleb128 0x1 ;# CIE Code Alignment Factor + .sleb128 8 ;# CIE Data Alignment Factor + .byte 0x2 ;# CIE RA Column + .byte 0xc ;# DW_CFA_def_cfa + .uleb128 0x1e + .uleb128 0x0 + .align 8 +L$ECIE1: +L$SFDE1: + .word L$EFDE1-L$ASFDE1 ;# FDE Length +L$ASFDE1: + .word L$ASFDE1-L$frame1 ;# FDE CIE offset + .dword L$FB1 ;# FDE initial location + .dword L$FE1-L$FB1 ;# FDE address range + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI11-L$FB1 + .byte 0x9 ;# DW_CFA_register: r3 in r1 + .uleb128 0x3 + .uleb128 0x1 + .byte 0x11 ;# DW_CFA_offset_extended_sf: r2 at cfa-16 + .uleb128 0x2 + .sleb128 -2 + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI12-L$CFI11 + .byte 0xd ;# DW_CFA_def_cfa_register: r3 + .uleb128 0x3 + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI13-L$CFI12 + .byte 0x83 ;# DW_CFA_offset: r3 at cfa+0 + .uleb128 0 + .byte 0x84 ;# DW_CFA_offset: r4 at cfa+8 + .uleb128 1 + + .align 8 +L$EFDE1: + +L$SFDE2: + .word L$EFDE2-L$ASFDE2 ;# FDE Length +L$ASFDE2: + .word L$ASFDE2-L$frame1 ;# FDE CIE offset + .dword L$FB2 ;# FDE initial location + .dword L$FE2-L$FB2 ;# FDE address range + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI21-L$FB2 + .byte 0x9 ;# DW_CFA_register: r3 in r1 + .uleb128 0x3 + .uleb128 0x1 + .byte 0x11 ;# DW_CFA_offset_extended_sf: r2 at cfa-16 + .uleb128 0x2 + .sleb128 -2 + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI22-L$CFI21 + .byte 0xd ;# DW_CFA_def_cfa_register: r3 + .uleb128 0x3 + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI23-L$CFI22 + .byte 0x83 ;# DW_CFA_offset: r3 at cfa+0 + .uleb128 0 + + .align 8 +L$EFDE2: