x86_64: Decouple return types from FFI_TYPE constants

author Richard Henderson <rth@twiddle.net>

Tue, 28 Oct 2014 18:17:35 +0000 (11:17 -0700)

committer Richard Henderson <rth@twiddle.net>

Wed, 12 Nov 2014 08:15:35 +0000 (09:15 +0100)
author Richard Henderson <rth@twiddle.net>
Tue, 28 Oct 2014 18:17:35 +0000 (11:17 -0700)
committer Richard Henderson <rth@twiddle.net>
Wed, 12 Nov 2014 08:15:35 +0000 (09:15 +0100)
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c

index 65fb595a6cef208f875147bb0229aeca715fcca9..a03061b5a7fb595e1d6af64969b2016a9107551d 100644 (file)
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -33,6 +33,7 @@
  #include <stdlib.h>
  #include <stdarg.h>
  #include <stdint.h>
+#include "internal64.h"
  
  #ifdef __x86_64__
  
@@ -191,7 +192,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
           }
         else if (size <= 16)
           {
-           classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+           classes[0] = classes[1] = X86_64_INTEGER_CLASS;
             return 2;
           }
         else
@@ -360,15 +361,55 @@ ffi_prep_cif_machdep (ffi_cif *cif)
    int gprcount, ssecount, i, avn, ngpr, nsse, flags;
    enum x86_64_reg_class classes[MAX_CLASSES];
    size_t bytes, n;
+  ffi_type *rtype;
  
    if (cif->abi != FFI_UNIX64)
      return FFI_BAD_ABI;
  
    gprcount = ssecount = 0;
  
-  flags = cif->rtype->type;
-  if (flags != FFI_TYPE_VOID)
+  rtype = cif->rtype;
+  switch (rtype->type)
      {
+    case FFI_TYPE_VOID:
+      flags = UNIX64_RET_VOID;
+      break;
+    case FFI_TYPE_UINT8:
+      flags = UNIX64_RET_UINT8;
+      break;
+    case FFI_TYPE_SINT8:
+      flags = UNIX64_RET_SINT8;
+      break;
+    case FFI_TYPE_UINT16:
+      flags = UNIX64_RET_UINT16;
+      break;
+    case FFI_TYPE_SINT16:
+      flags = UNIX64_RET_SINT16;
+      break;
+    case FFI_TYPE_UINT32:
+      flags = UNIX64_RET_UINT32;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+      flags = UNIX64_RET_SINT32;
+      break;
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+      flags = UNIX64_RET_INT64;
+      break;
+    case FFI_TYPE_POINTER:
+      flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
+      break;
+    case FFI_TYPE_FLOAT:
+      flags = UNIX64_RET_XMM32;
+      break;
+    case FFI_TYPE_DOUBLE:
+      flags = UNIX64_RET_XMM64;
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      flags = UNIX64_RET_X87;
+      break;
+    case FFI_TYPE_STRUCT:
        n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
        if (n == 0)
         {
@@ -376,22 +417,24 @@ ffi_prep_cif_machdep (ffi_cif *cif)
              memory is the first argument.  Allocate a register for it.  */
           gprcount++;
           /* We don't have to do anything in asm for the return.  */
-         flags = FFI_TYPE_VOID;
+         flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM;
         }
-      else if (flags == FFI_TYPE_STRUCT)
+      else
         {
           /* Mark which registers the result appears in.  */
           _Bool sse0 = SSE_CLASS_P (classes[0]);
           _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
-         if (sse0 && !sse1)
-           flags |= 1 << 8;
-         else if (!sse0 && sse1)
-           flags |= 1 << 9;
-         else if (sse0 && sse1)
-           flags |= 1 << 10;
+         if (sse0)
+           flags = (sse1 ? UNIX64_RET_ST_XMM0_XMM1 : UNIX64_RET_ST_XMM0_RAX);
+         else
+           flags = (sse1 ? UNIX64_RET_ST_RAX_XMM0 : UNIX64_RET_ST_RAX_RDX);
+
           /* Mark the true size of the structure.  */
-         flags |= cif->rtype->size << 12;
+         flags |= rtype->size << UNIX64_SIZE_SHIFT;
         }
+      break;
+    default:
+      return FFI_BAD_TYPEDEF;
      }
  
    /* Go over all arguments and determine the way they should be passed.
@@ -418,9 +461,10 @@ ffi_prep_cif_machdep (ffi_cif *cif)
         }
      }
    if (ssecount)
-    flags |= 1 << 11;
+    flags |= UNIX64_FLAG_XMM_ARGS;
+
    cif->flags = flags;
-  cif->bytes = (unsigned)ALIGN (bytes, 8);
+  cif->bytes = ALIGN (bytes, 8);
  
    return FFI_OK;
  }
@@ -432,20 +476,22 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
    enum x86_64_reg_class classes[MAX_CLASSES];
    char *stack, *argp;
    ffi_type **arg_types;
-  int gprcount, ssecount, ngpr, nsse, i, avn;
-  _Bool ret_in_memory;
+  int gprcount, ssecount, ngpr, nsse, i, avn, flags;
    struct register_args *reg_args;
  
    /* Can't call 32-bit mode from 64-bit mode.  */
    FFI_ASSERT (cif->abi == FFI_UNIX64);
  
    /* If the return value is a struct and we don't have a return value
-     address then we need to make one.  Note the setting of flags to
-     VOID above in ffi_prep_cif_machdep.  */
-  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
-                  && (cif->flags & 0xff) == FFI_TYPE_VOID);
-  if (rvalue == NULL && ret_in_memory)
-    rvalue = alloca (cif->rtype->size);
+     address then we need to make one.  Otherwise we can ignore it.  */
+  flags = cif->flags;
+  if (rvalue == NULL)
+    {
+      if (flags & UNIX64_FLAG_RET_IN_MEM)
+       rvalue = alloca (cif->rtype->size);
+      else
+       flags = UNIX64_RET_VOID;
+    }
  
    /* Allocate the space for the arguments, plus 4 words of temp space.  */
    stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
@@ -458,7 +504,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
  
    /* If the return value is passed in memory, add the pointer as the
       first integer argument.  */
-  if (ret_in_memory)
+  if (flags & UNIX64_FLAG_RET_IN_MEM)
      reg_args->gpr[gprcount++] = (unsigned long) rvalue;
  
    avn = cif->nargs;
@@ -503,17 +549,17 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
                   switch (arg_types[i]->type)
                     {
                     case FFI_TYPE_SINT8:
-                     *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+                     reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
                       break;
                     case FFI_TYPE_SINT16:
-                     *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+                     reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
                       break;
                     case FFI_TYPE_SINT32:
-                     *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+                     reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
                       break;
                     default:
                       reg_args->gpr[gprcount] = 0;
-                     memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+                     memcpy (&reg_args->gpr[gprcount], a, size);
                     }
                   gprcount++;
                   break;
@@ -533,7 +579,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
    reg_args->rax = ssecount;
  
    ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
-                  cif->flags, rvalue, fn);
+                  flags, rvalue, fn);
  }
  
  void
@@ -573,7 +619,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
    if (cif->abi != FFI_UNIX64)
      return FFI_BAD_ABI;
  
-  if (cif->flags & (1 << 11))
+  if (cif->flags & UNIX64_FLAG_XMM_ARGS)
      dest = ffi_closure_unix64_sse;
    else
      dest = ffi_closure_unix64;
@@ -600,39 +646,17 @@ ffi_closure_unix64_inner(ffi_cif *cif,
    ffi_type **arg_types;
    long i, avn;
    int gprcount, ssecount, ngpr, nsse;
-  int ret;
+  int flags;
  
-  avalue = alloca(cif->nargs * sizeof(void *));
+  avn = cif->nargs;
+  flags = cif->flags;
+  avalue = alloca(avn * sizeof(void *));
    gprcount = ssecount = 0;
  
-  ret = cif->rtype->type;
-  if (ret != FFI_TYPE_VOID)
-    {
-      enum x86_64_reg_class classes[MAX_CLASSES];
-      size_t n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
-      if (n == 0)
-       {
-         /* The return value goes in memory.  Arrange for the closure
-            return value to go directly back to the original caller.  */
-         rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
-         /* We don't have to do anything in asm for the return.  */
-         ret = FFI_TYPE_VOID;
-       }
-      else if (ret == FFI_TYPE_STRUCT && n == 2)
-       {
-         /* Mark which register the second word of the structure goes in.  */
-         _Bool sse0 = SSE_CLASS_P (classes[0]);
-         _Bool sse1 = SSE_CLASS_P (classes[1]);
-         if (!sse0 && sse1)
-           ret |= 1 << 8;
-         else if (sse0 && !sse1)
-           ret |= 1 << 9;
-       }
-    }
+  if (flags & UNIX64_FLAG_RET_IN_MEM)
+    rvalue = (void *)(uintptr_t)reg_args->gpr[gprcount++];
  
-  avn = cif->nargs;
    arg_types = cif->arg_types;
-
    for (i = 0; i < avn; ++i)
      {
        enum x86_64_reg_class classes[MAX_CLASSES];
@@ -693,7 +717,7 @@ ffi_closure_unix64_inner(ffi_cif *cif,
    fun (cif, rvalue, avalue, user_data);
  
    /* Tell assembly how to perform return type promotions.  */
-  return ret;
+  return flags;
  }
  
  extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
@@ -706,7 +730,7 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
    if (cif->abi != FFI_UNIX64)
      return FFI_BAD_ABI;
  
-  closure->tramp = (cif->flags & (1 << 11)
+  closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS
                     ? ffi_go_closure_unix64_sse
                     : ffi_go_closure_unix64);
    closure->cif = cif;
diff --git a/src/x86/internal64.h b/src/x86/internal64.h

new file mode 100644 (file)

index 0000000..07b1b10
--- /dev/null
+++ b/src/x86/internal64.h
@@ -0,0 +1,20 @@
+#define UNIX64_RET_VOID                0
+#define UNIX64_RET_UINT8       1
+#define UNIX64_RET_UINT16      2
+#define UNIX64_RET_UINT32      3
+#define UNIX64_RET_SINT8       4
+#define UNIX64_RET_SINT16      5
+#define UNIX64_RET_SINT32      6
+#define UNIX64_RET_INT64       7
+#define UNIX64_RET_XMM32       8
+#define UNIX64_RET_XMM64       9
+#define UNIX64_RET_X87         10
+#define UNIX64_RET_ST_RAX_RDX  11
+#define UNIX64_RET_ST_XMM0_RAX 12
+#define UNIX64_RET_ST_RAX_XMM0 13
+#define UNIX64_RET_ST_XMM0_XMM1        14
+#define UNIX64_RET_LAST                14
+
+#define UNIX64_FLAG_RET_IN_MEM (1 << 10)
+#define UNIX64_FLAG_XMM_ARGS   (1 << 11)
+#define UNIX64_SIZE_SHIFT      12
diff --git a/src/x86/unix64.S b/src/x86/unix64.S

index 797b9d9fbbd936bf9e789b39ac2a474947975e16..0151229c0629e9ee4c0c559851f7f0c17cf36a52 100644 (file)
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -31,9 +31,15 @@
  #include <fficonfig.h>
  #include <ffi.h>
  #include <ffi_cfi.h>
+#include "internal64.h"
  
         .text
  
+.macro E index
+       .align  8
+       .org    0b + \index * 8, 0x90
+.endm
+
  /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
                     void *raddr, void (*fnaddr)(void));
  
@@ -41,7 +47,7 @@
     for this function.  This has been allocated by ffi_call.  We also
     deallocate some of the stack that has been alloca'd.  */
  
-       .align  2
+       .align  8
         .globl  ffi_call_unix64
         .type   ffi_call_unix64,@function
         FFI_HIDDEN(ffi_call_unix64)
@@ -100,109 +106,81 @@ ffi_call_unix64:
         cfi_restore(%rbp)
  
         /* The first byte of the flags contains the FFI_TYPE.  */
+       cmpb    $UNIX64_RET_LAST, %cl
         movzbl  %cl, %r10d
-       leaq    .Lstore_table(%rip), %r11
-       movslq  (%r11, %r10, 4), %r10
-       addq    %r11, %r10
-       jmp     *%r10
+       leaq    0f(%rip), %r11
+       ja      9f
+       leaq    (%r11, %r10, 8), %r10
  
-       .section .rodata
-       .align  2
-.Lstore_table:
-       .long   .Lst_void-.Lstore_table         /* FFI_TYPE_VOID */
-       .long   .Lst_sint32-.Lstore_table       /* FFI_TYPE_INT */
-       .long   .Lst_float-.Lstore_table        /* FFI_TYPE_FLOAT */
-       .long   .Lst_double-.Lstore_table       /* FFI_TYPE_DOUBLE */
-       .long   .Lst_ldouble-.Lstore_table      /* FFI_TYPE_LONGDOUBLE */
-       .long   .Lst_uint8-.Lstore_table        /* FFI_TYPE_UINT8 */
-       .long   .Lst_sint8-.Lstore_table        /* FFI_TYPE_SINT8 */
-       .long   .Lst_uint16-.Lstore_table       /* FFI_TYPE_UINT16 */
-       .long   .Lst_sint16-.Lstore_table       /* FFI_TYPE_SINT16 */
-       .long   .Lst_uint32-.Lstore_table       /* FFI_TYPE_UINT32 */
-       .long   .Lst_sint32-.Lstore_table       /* FFI_TYPE_SINT32 */
-       .long   .Lst_int64-.Lstore_table        /* FFI_TYPE_UINT64 */
-       .long   .Lst_int64-.Lstore_table        /* FFI_TYPE_SINT64 */
-       .long   .Lst_struct-.Lstore_table       /* FFI_TYPE_STRUCT */
-       .long   .Lst_int64-.Lstore_table        /* FFI_TYPE_POINTER */
-       .previous
+       /* Prep for the structure cases: scratch area in redzone.  */
+       leaq    -20(%rsp), %rsi
+       jmp     *%r10
  
-       .align 2
-.Lst_void:
+       .align  8
+0:
+E UNIX64_RET_VOID
         ret
-       .align 2
-
-.Lst_uint8:
-       movzbq  %al, %rax
+E UNIX64_RET_UINT8
+       movzbl  %al, %eax
         movq    %rax, (%rdi)
         ret
-       .align 2
-.Lst_sint8:
-       movsbq  %al, %rax
+E UNIX64_RET_UINT16
+       movzwl  %ax, %eax
         movq    %rax, (%rdi)
         ret
-       .align 2
-.Lst_uint16:
-       movzwq  %ax, %rax
+E UNIX64_RET_UINT32
+       movl    %eax, %eax
         movq    %rax, (%rdi)
-       .align 2
-.Lst_sint16:
-       movswq  %ax, %rax
+       ret
+E UNIX64_RET_SINT8
+       movsbq  %al, %rax
         movq    %rax, (%rdi)
         ret
-       .align 2
-.Lst_uint32:
-       movl    %eax, %eax
+E UNIX64_RET_SINT16
+       movswq  %ax, %rax
         movq    %rax, (%rdi)
-       .align 2
-.Lst_sint32:
+       ret
+E UNIX64_RET_SINT32
         cltq
         movq    %rax, (%rdi)
         ret
-       .align 2
-.Lst_int64:
+E UNIX64_RET_INT64
         movq    %rax, (%rdi)
         ret
-
-       .align 2
-.Lst_float:
-       movss   %xmm0, (%rdi)
+E UNIX64_RET_XMM32
+       movd    %xmm0, (%rdi)
         ret
-       .align 2
-.Lst_double:
-       movsd   %xmm0, (%rdi)
+E UNIX64_RET_XMM64
+       movq    %xmm0, (%rdi)
         ret
-.Lst_ldouble:
+E UNIX64_RET_X87
         fstpt   (%rdi)
         ret
-
-       .align 2
-.Lst_struct:
-       leaq    -20(%rsp), %rsi         /* Scratch area in redzone.  */
-
-       /* We have to locate the values now, and since we don't want to
-          write too much data into the user's return value, we spill the
-          value to a 16 byte scratch area first.  Bits 8, 9, and 10
-          control where the values are located.  Only one of the three
-          bits will be set; see ffi_prep_cif_machdep for the pattern.  */
-       movd    %xmm0, %r10
-       movd    %xmm1, %r11
-       testl   $0x100, %ecx
-       cmovnz  %rax, %rdx
-       cmovnz  %r10, %rax
-       testl   $0x200, %ecx
-       cmovnz  %r10, %rdx
-       testl   $0x400, %ecx
-       cmovnz  %r10, %rax
-       cmovnz  %r11, %rdx
-       movq    %rax, (%rsi)
+E UNIX64_RET_ST_RAX_RDX
         movq    %rdx, 8(%rsi)
-
-       /* Bits 12-31 contain the true size of the structure.  Copy from
-          the scratch area to the true destination.  */
-       shrl    $12, %ecx
+       jmp     2f
+E UNIX64_RET_ST_XMM0_RAX
+       movq    %rax, 8(%rsi)
+       jmp     3f
+E UNIX64_RET_ST_RAX_XMM0
+       movq    %xmm0, 8(%rsi)
+       jmp     2f
+E UNIX64_RET_ST_XMM0_XMM1
+       movq    %xmm1, 8(%rsi)
+
+       .align 8
+3:     movq    %xmm0, (%rsi)
+       shrl    $UNIX64_SIZE_SHIFT, %ecx
+       rep movsb
+       ret
+       .align 8
+2:     movq    %rax, (%rsi)
+       shrl    $UNIX64_SIZE_SHIFT, %ecx
         rep movsb
         ret
  
+9:     call    abort@PLT
+
         /* Many times we can avoid loading any SSE registers at all.
            It's not worth an indirect jump to load the exact set of
            SSE registers needed; zero or all is a good compromise.  */
@@ -292,84 +270,68 @@ ffi_closure_unix64:
         cfi_adjust_cfa_offset(-ffi_closure_FS)
  
         /* The first byte of the return value contains the FFI_TYPE.  */
+       cmpb    $UNIX64_RET_LAST, %al
         movzbl  %al, %r10d
-       leaq    .Lload_table(%rip), %r11
-       movslq  (%r11, %r10, 4), %r10
-       addq    %r11, %r10
+       leaq    0f(%rip), %r11
+       ja      9f
+       leaq    (%r11, %r10, 8), %r10
         jmp     *%r10
  
-       .section .rodata
-       .align  2
-.Lload_table:
-       .long   .Lld_void-.Lload_table          /* FFI_TYPE_VOID */
-       .long   .Lld_int32-.Lload_table         /* FFI_TYPE_INT */
-       .long   .Lld_float-.Lload_table         /* FFI_TYPE_FLOAT */
-       .long   .Lld_double-.Lload_table        /* FFI_TYPE_DOUBLE */
-       .long   .Lld_ldouble-.Lload_table       /* FFI_TYPE_LONGDOUBLE */
-       .long   .Lld_int8-.Lload_table          /* FFI_TYPE_UINT8 */
-       .long   .Lld_int8-.Lload_table          /* FFI_TYPE_SINT8 */
-       .long   .Lld_int16-.Lload_table         /* FFI_TYPE_UINT16 */
-       .long   .Lld_int16-.Lload_table         /* FFI_TYPE_SINT16 */
-       .long   .Lld_int32-.Lload_table         /* FFI_TYPE_UINT32 */
-       .long   .Lld_int32-.Lload_table         /* FFI_TYPE_SINT32 */
-       .long   .Lld_int64-.Lload_table         /* FFI_TYPE_UINT64 */
-       .long   .Lld_int64-.Lload_table         /* FFI_TYPE_SINT64 */
-       .long   .Lld_struct-.Lload_table        /* FFI_TYPE_STRUCT */
-       .long   .Lld_int64-.Lload_table         /* FFI_TYPE_POINTER */
-       .previous
-
-       .align 2
-.Lld_void:
+       .align  8
+0:
+E UNIX64_RET_VOID
         ret
-
-       .align 2
-.Lld_int8:
+E UNIX64_RET_UINT8
         movzbl  ffi_closure_RED_RVALUE(%rsp), %eax
         ret
-       .align 2
-.Lld_int16:
+E UNIX64_RET_UINT16
         movzwl  ffi_closure_RED_RVALUE(%rsp), %eax
         ret
-       .align 2
-.Lld_int32:
+E UNIX64_RET_UINT32
         movl    ffi_closure_RED_RVALUE(%rsp), %eax
         ret
-       .align 2
-.Lld_int64:
+E UNIX64_RET_SINT8
+       movsbl  ffi_closure_RED_RVALUE(%rsp), %eax
+       ret
+E UNIX64_RET_SINT16
+       movswl  ffi_closure_RED_RVALUE(%rsp), %eax
+       ret
+E UNIX64_RET_SINT32
+       movl    ffi_closure_RED_RVALUE(%rsp), %eax
+       ret
+E UNIX64_RET_INT64
         movq    ffi_closure_RED_RVALUE(%rsp), %rax
         ret
-
-       .align 2
-.Lld_float:
-       movss   ffi_closure_RED_RVALUE(%rsp), %xmm0
+E UNIX64_RET_XMM32
+       movd    ffi_closure_RED_RVALUE(%rsp), %xmm0
         ret
-       .align 2
-.Lld_double:
-       movsd   ffi_closure_RED_RVALUE(%rsp), %xmm0
+E UNIX64_RET_XMM64
+       movq    ffi_closure_RED_RVALUE(%rsp), %xmm0
         ret
-       .align 2
-.Lld_ldouble:
+E UNIX64_RET_X87
         fldt    ffi_closure_RED_RVALUE(%rsp)
         ret
-
-       .align 2
-.Lld_struct:
-       /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
-          %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
-          both rdx and xmm1 with the second word.  For the remaining,
-          bit 8 set means xmm0 gets the second word, and bit 9 means
-          that rax gets the second word.  */
-       movq    ffi_closure_RED_RVALUE(%rsp), %rcx
+E UNIX64_RET_ST_RAX_RDX
         movq    ffi_closure_RED_RVALUE+8(%rsp), %rdx
+       jmp     2f
+E UNIX64_RET_ST_XMM0_RAX
+       movq    ffi_closure_RED_RVALUE+8(%rsp), %rax
+       jmp     3f
+E UNIX64_RET_ST_RAX_XMM0
+       movq    ffi_closure_RED_RVALUE+8(%rsp), %xmm0
+       jmp     2f
+E UNIX64_RET_ST_XMM0_XMM1
         movq    ffi_closure_RED_RVALUE+8(%rsp), %xmm1
-       testl   $0x100, %eax
-       cmovnz  %rdx, %rcx
-       movd    %rcx, %xmm0
-       testl   $0x200, %eax
-       movq    ffi_closure_RED_RVALUE(%rsp), %rax
-       cmovnz  %rdx, %rax
+
+       .align  8
+3:     movq    ffi_closure_RED_RVALUE(%rsp), %xmm0
+       ret
+       .align  8
+2:     movq    ffi_closure_RED_RVALUE(%rsp), %rax
         ret
  
+9:     call    abort@PLT
+
         cfi_endproc
         .size   ffi_closure_unix64,.-ffi_closure_unix64
author	Richard Henderson <rth@twiddle.net>
	Tue, 28 Oct 2014 18:17:35 +0000 (11:17 -0700)
committer	Richard Henderson <rth@twiddle.net>
	Wed, 12 Nov 2014 08:15:35 +0000 (09:15 +0100)
src/x86/ffi64.c		patch \| blob \| history
src/x86/internal64.h	[new file with mode: 0644]	patch \| blob
src/x86/unix64.S		patch \| blob \| history