case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
case FFI_TYPE_POINTER:
+ do_integer:
{
size_t size = byte_offset + type->size;
}
return words;
}
-
- default:
- FFI_ASSERT(0);
+ case FFI_TYPE_COMPLEX:
+ {
+ ffi_type *inner = type->elements[0];
+ switch (inner->type)
+ {
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ goto do_integer;
+
+ case FFI_TYPE_FLOAT:
+ classes[0] = X86_64_SSE_CLASS;
+ if (byte_offset % 8)
+ {
+ classes[1] = X86_64_SSESF_CLASS;
+ return 2;
+ }
+ return 1;
+ case FFI_TYPE_DOUBLE:
+ classes[0] = classes[1] = X86_64_SSEDF_CLASS;
+ return 2;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ classes[0] = X86_64_COMPLEX_X87_CLASS;
+ return 1;
+#endif
+ }
+ }
}
- return 0; /* Never reached. */
+ abort();
}
/* Examine the argument and return set number of register required in each
{
int gprcount, ssecount, i, avn, ngpr, nsse, flags;
enum x86_64_reg_class classes[MAX_CLASSES];
- size_t bytes, n;
+ size_t bytes, n, rtype_size;
ffi_type *rtype;
if (cif->abi != FFI_UNIX64)
gprcount = ssecount = 0;
rtype = cif->rtype;
+ rtype_size = rtype->size;
switch (rtype->type)
{
case FFI_TYPE_VOID:
}
else
{
- /* Mark which registers the result appears in. */
_Bool sse0 = SSE_CLASS_P (classes[0]);
- _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
- if (sse0)
- flags = (sse1 ? UNIX64_RET_ST_XMM0_XMM1 : UNIX64_RET_ST_XMM0_RAX);
- else
- flags = (sse1 ? UNIX64_RET_ST_RAX_XMM0 : UNIX64_RET_ST_RAX_RDX);
- /* Mark the true size of the structure. */
- flags |= rtype->size << UNIX64_SIZE_SHIFT;
+ if (rtype_size == 4 && sse0)
+ flags = UNIX64_RET_XMM32;
+ else if (rtype_size == 8)
+ flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64;
+ else
+ {
+ _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+ if (sse0 && sse1)
+ flags = UNIX64_RET_ST_XMM0_XMM1;
+ else if (sse0)
+ flags = UNIX64_RET_ST_XMM0_RAX;
+ else if (sse1)
+ flags = UNIX64_RET_ST_RAX_XMM0;
+ else
+ flags = UNIX64_RET_ST_RAX_RDX;
+ flags |= rtype_size << UNIX64_SIZE_SHIFT;
+ }
+ }
+ break;
+ case FFI_TYPE_COMPLEX:
+ switch (rtype->elements[0]->type)
+ {
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT);
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = UNIX64_RET_XMM64;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT);
+ break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ flags = UNIX64_RET_X87_2;
+ break;
+#endif
+ default:
+ return FFI_BAD_TYPEDEF;
}
break;
default:
#define UNIX64_RET_XMM32 8
#define UNIX64_RET_XMM64 9
#define UNIX64_RET_X87 10
-#define UNIX64_RET_ST_RAX_RDX 11
+#define UNIX64_RET_X87_2 11
#define UNIX64_RET_ST_XMM0_RAX 12
#define UNIX64_RET_ST_RAX_XMM0 13
#define UNIX64_RET_ST_XMM0_XMM1 14
-#define UNIX64_RET_LAST 14
+#define UNIX64_RET_ST_RAX_RDX 15
+
+#define UNIX64_RET_LAST 15
#define UNIX64_FLAG_RET_IN_MEM (1 << 10)
#define UNIX64_FLAG_XMM_ARGS (1 << 11)
E UNIX64_RET_X87
fstpt (%rdi)
ret
-E UNIX64_RET_ST_RAX_RDX
- movq %rdx, 8(%rsi)
- jmp 2f
+E UNIX64_RET_X87_2
+ fstpt (%rdi)
+ fstpt 16(%rdi)
+ ret
E UNIX64_RET_ST_XMM0_RAX
movq %rax, 8(%rsi)
jmp 3f
jmp 2f
E UNIX64_RET_ST_XMM0_XMM1
movq %xmm1, 8(%rsi)
-
- .align 8
-3: movq %xmm0, (%rsi)
+ jmp 3f
+E UNIX64_RET_ST_RAX_RDX
+ movq %rdx, 8(%rsi)
+2: movq %rax, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
.align 8
-2: movq %rax, (%rsi)
+3: movq %xmm0, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
.size ffi_call_unix64,.-ffi_call_unix64
/* 6 general registers, 8 vector registers,
- 16 bytes of rvalue, 8 bytes of alignment. */
+ 32 bytes of rvalue, 8 bytes of alignment. */
#define ffi_closure_OFS_G 0
#define ffi_closure_OFS_V (6*8)
#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16)
-#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 16 + 8)
+#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 32 + 8)
/* The location of rvalue within the red zone after deallocating the frame. */
#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS)
leaq 0f(%rip), %r11
ja 9f
leaq (%r11, %r10, 8), %r10
+ leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
.align 8
E UNIX64_RET_VOID
ret
E UNIX64_RET_UINT8
- movzbl ffi_closure_RED_RVALUE(%rsp), %eax
+ movzbl (%rsi), %eax
ret
E UNIX64_RET_UINT16
- movzwl ffi_closure_RED_RVALUE(%rsp), %eax
+ movzwl (%rsi), %eax
ret
E UNIX64_RET_UINT32
- movl ffi_closure_RED_RVALUE(%rsp), %eax
+ movl (%rsi), %eax
ret
E UNIX64_RET_SINT8
- movsbl ffi_closure_RED_RVALUE(%rsp), %eax
+ movsbl (%rsi), %eax
ret
E UNIX64_RET_SINT16
- movswl ffi_closure_RED_RVALUE(%rsp), %eax
+ movswl (%rsi), %eax
ret
E UNIX64_RET_SINT32
- movl ffi_closure_RED_RVALUE(%rsp), %eax
+ movl (%rsi), %eax
ret
E UNIX64_RET_INT64
- movq ffi_closure_RED_RVALUE(%rsp), %rax
+ movq (%rsi), %rax
ret
E UNIX64_RET_XMM32
- movd ffi_closure_RED_RVALUE(%rsp), %xmm0
+ movd (%rsi), %xmm0
ret
E UNIX64_RET_XMM64
- movq ffi_closure_RED_RVALUE(%rsp), %xmm0
+ movq (%rsi), %xmm0
ret
E UNIX64_RET_X87
- fldt ffi_closure_RED_RVALUE(%rsp)
+ fldt (%rsi)
+ ret
+E UNIX64_RET_X87_2
+ fldt 16(%rsi)
+ fldt (%rsi)
ret
-E UNIX64_RET_ST_RAX_RDX
- movq ffi_closure_RED_RVALUE+8(%rsp), %rdx
- jmp 2f
E UNIX64_RET_ST_XMM0_RAX
- movq ffi_closure_RED_RVALUE+8(%rsp), %rax
+ movq 8(%rsi), %rax
jmp 3f
E UNIX64_RET_ST_RAX_XMM0
- movq ffi_closure_RED_RVALUE+8(%rsp), %xmm0
+ movq 8(%rsi), %xmm0
jmp 2f
E UNIX64_RET_ST_XMM0_XMM1
- movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1
-
- .align 8
-3: movq ffi_closure_RED_RVALUE(%rsp), %xmm0
+ movq 8(%rsi), %xmm1
+ jmp 3f
+E UNIX64_RET_ST_RAX_RDX
+ movq 8(%rsi), %rdx
+2: movq (%rsi), %rax
ret
.align 8
-2: movq ffi_closure_RED_RVALUE(%rsp), %rax
+3: movq (%rsi), %xmm0
ret
9: call abort@PLT