1 /* -----------------------------------------------------------------------
2 ffi64.c - Copyright (c) 2013 The Written Word, Inc.
3 Copyright (c) 2011 Anthony Green
4 Copyright (c) 2008, 2010 Red Hat, Inc.
5 Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
7 x86-64 Foreign Function Interface
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 ``Software''), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
17 The above copyright notice and this permission notice shall be included
18 in all copies or substantial portions of the Software.
20 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 DEALINGS IN THE SOFTWARE.
28 ----------------------------------------------------------------------- */
31 #include <ffi_common.h>
38 #define MAX_GPR_REGS 6
39 #define MAX_SSE_REGS 8
41 #if defined(__INTEL_COMPILER)
42 #include "xmmintrin.h"
43 #define UINT128 __m128
45 #if defined(__SUNPRO_C)
46 #include <sunmedia_types.h>
47 #define UINT128 __m128i
49 #define UINT128 __int128_t
62 /* Registers for argument passing. */
63 UINT64 gpr[MAX_GPR_REGS];
64 union big_int_union sse[MAX_SSE_REGS];
67 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
68 void *raddr, void (*fnaddr)(void), unsigned ssecount);
70 /* All reference to register classes here is identical to the code in
71 gcc/config/i386/i386.c. Do *not* change one without the other. */
73 /* Register class used for passing given 64bit part of the argument.
74 These represent classes as documented by the PS ABI, with the
75 exception of SSESF, SSEDF classes, that are basically SSE class,
76 just gcc will use SF or DFmode move instead of DImode to avoid
77 reformatting penalties.
79 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
80 whenever possible (upper half does contain padding). */
85 X86_64_INTEGERSI_CLASS,
92 X86_64_COMPLEX_X87_CLASS,
98 #define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
100 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
101 of this code is to classify each 8bytes of incoming argument by the register
102 class and assign registers accordingly. */
104 /* Return the union class of CLASS1 and CLASS2.
105 See the x86-64 PS ABI for details. */
107 static enum x86_64_reg_class
108 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
110 /* Rule #1: If both classes are equal, this is the resulting class. */
111 if (class1 == class2)
114 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
116 if (class1 == X86_64_NO_CLASS)
118 if (class2 == X86_64_NO_CLASS)
121 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
122 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
123 return X86_64_MEMORY_CLASS;
125 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
126 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
127 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
128 return X86_64_INTEGERSI_CLASS;
129 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
130 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
131 return X86_64_INTEGER_CLASS;
133 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
135 if (class1 == X86_64_X87_CLASS
136 || class1 == X86_64_X87UP_CLASS
137 || class1 == X86_64_COMPLEX_X87_CLASS
138 || class2 == X86_64_X87_CLASS
139 || class2 == X86_64_X87UP_CLASS
140 || class2 == X86_64_COMPLEX_X87_CLASS)
141 return X86_64_MEMORY_CLASS;
143 /* Rule #6: Otherwise class SSE is used. */
144 return X86_64_SSE_CLASS;
147 /* Classify the argument of type TYPE and mode MODE.
148 CLASSES will be filled by the register class used to pass each word
149 of the operand. The number of words is returned. In case the parameter
150 should be passed in memory, 0 is returned. As a special case for zero
151 sized containers, classes[0] will be NO_CLASS and 1 is returned.
153 See the x86-64 PS ABI for details.
156 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
163 case FFI_TYPE_UINT16:
164 case FFI_TYPE_SINT16:
165 case FFI_TYPE_UINT32:
166 case FFI_TYPE_SINT32:
167 case FFI_TYPE_UINT64:
168 case FFI_TYPE_SINT64:
169 case FFI_TYPE_POINTER:
171 size_t size = byte_offset + type->size;
175 classes[0] = X86_64_INTEGERSI_CLASS;
180 classes[0] = X86_64_INTEGER_CLASS;
185 classes[0] = X86_64_INTEGER_CLASS;
186 classes[1] = X86_64_INTEGERSI_CLASS;
191 classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
198 if (!(byte_offset % 8))
199 classes[0] = X86_64_SSESF_CLASS;
201 classes[0] = X86_64_SSE_CLASS;
203 case FFI_TYPE_DOUBLE:
204 classes[0] = X86_64_SSEDF_CLASS;
206 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
207 case FFI_TYPE_LONGDOUBLE:
208 classes[0] = X86_64_X87_CLASS;
209 classes[1] = X86_64_X87UP_CLASS;
212 case FFI_TYPE_STRUCT:
214 const size_t UNITS_PER_WORD = 8;
215 size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
218 enum x86_64_reg_class subclasses[MAX_CLASSES];
220 /* If the struct is larger than 32 bytes, pass it on the stack. */
224 for (i = 0; i < words; i++)
225 classes[i] = X86_64_NO_CLASS;
227 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
228 signalize memory class, so handle it as special case. */
231 classes[0] = X86_64_NO_CLASS;
235 /* Merge the fields of structure. */
236 for (ptr = type->elements; *ptr != NULL; ptr++)
240 byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
242 num = classify_argument (*ptr, subclasses, byte_offset % 8);
245 for (i = 0; i < num; i++)
247 size_t pos = byte_offset / 8;
249 merge_classes (subclasses[i], classes[i + pos]);
252 byte_offset += (*ptr)->size;
257 /* When size > 16 bytes, if the first one isn't
258 X86_64_SSE_CLASS or any other ones aren't
259 X86_64_SSEUP_CLASS, everything should be passed in
261 if (classes[0] != X86_64_SSE_CLASS)
264 for (i = 1; i < words; i++)
265 if (classes[i] != X86_64_SSEUP_CLASS)
269 /* Final merger cleanup. */
270 for (i = 0; i < words; i++)
272 /* If one class is MEMORY, everything should be passed in
274 if (classes[i] == X86_64_MEMORY_CLASS)
277 /* The X86_64_SSEUP_CLASS should be always preceded by
278 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
279 if (classes[i] == X86_64_SSEUP_CLASS
280 && classes[i - 1] != X86_64_SSE_CLASS
281 && classes[i - 1] != X86_64_SSEUP_CLASS)
283 /* The first one should never be X86_64_SSEUP_CLASS. */
285 classes[i] = X86_64_SSE_CLASS;
288 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
289 everything should be passed in memory. */
290 if (classes[i] == X86_64_X87UP_CLASS
291 && (classes[i - 1] != X86_64_X87_CLASS))
293 /* The first one should never be X86_64_X87UP_CLASS. */
304 return 0; /* Never reached. */
307 /* Examine the argument and return set number of register required in each
308 class. Return zero iff parameter should be passed in memory, otherwise
309 the number of registers. */
312 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
313 _Bool in_return, int *pngpr, int *pnsse)
318 n = classify_argument (type, classes, 0);
323 for (i = 0; i < n; ++i)
326 case X86_64_INTEGER_CLASS:
327 case X86_64_INTEGERSI_CLASS:
330 case X86_64_SSE_CLASS:
331 case X86_64_SSESF_CLASS:
332 case X86_64_SSEDF_CLASS:
335 case X86_64_NO_CLASS:
336 case X86_64_SSEUP_CLASS:
338 case X86_64_X87_CLASS:
339 case X86_64_X87UP_CLASS:
340 case X86_64_COMPLEX_X87_CLASS:
341 return in_return != 0;
352 /* Perform machine dependent cif processing. */
355 ffi_prep_cif_machdep (ffi_cif *cif)
357 int gprcount, ssecount, i, avn, ngpr, nsse, flags;
358 enum x86_64_reg_class classes[MAX_CLASSES];
361 gprcount = ssecount = 0;
363 flags = cif->rtype->type;
364 if (flags != FFI_TYPE_VOID)
366 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
369 /* The return value is passed in memory. A pointer to that
370 memory is the first argument. Allocate a register for it. */
372 /* We don't have to do anything in asm for the return. */
373 flags = FFI_TYPE_VOID;
375 else if (flags == FFI_TYPE_STRUCT)
377 /* Mark which registers the result appears in. */
378 _Bool sse0 = SSE_CLASS_P (classes[0]);
379 _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
382 else if (!sse0 && sse1)
384 else if (sse0 && sse1)
386 /* Mark the true size of the structure. */
387 flags |= cif->rtype->size << 12;
391 /* Go over all arguments and determine the way they should be passed.
392 If it's in a register and there is space for it, let that be so. If
393 not, add it's size to the stack byte count. */
394 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
396 if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
397 || gprcount + ngpr > MAX_GPR_REGS
398 || ssecount + nsse > MAX_SSE_REGS)
400 long align = cif->arg_types[i]->alignment;
405 bytes = ALIGN (bytes, align);
406 bytes += cif->arg_types[i]->size;
417 cif->bytes = (unsigned)ALIGN (bytes, 8);
423 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
425 enum x86_64_reg_class classes[MAX_CLASSES];
427 ffi_type **arg_types;
428 int gprcount, ssecount, ngpr, nsse, i, avn;
430 struct register_args *reg_args;
432 /* Can't call 32-bit mode from 64-bit mode. */
433 FFI_ASSERT (cif->abi == FFI_UNIX64);
435 /* If the return value is a struct and we don't have a return value
436 address then we need to make one. Note the setting of flags to
437 VOID above in ffi_prep_cif_machdep. */
438 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
439 && (cif->flags & 0xff) == FFI_TYPE_VOID);
440 if (rvalue == NULL && ret_in_memory)
441 rvalue = alloca (cif->rtype->size);
443 /* Allocate the space for the arguments, plus 4 words of temp space. */
444 stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
445 reg_args = (struct register_args *) stack;
446 argp = stack + sizeof (struct register_args);
448 gprcount = ssecount = 0;
450 /* If the return value is passed in memory, add the pointer as the
451 first integer argument. */
453 reg_args->gpr[gprcount++] = (unsigned long) rvalue;
456 arg_types = cif->arg_types;
458 for (i = 0; i < avn; ++i)
460 size_t n, size = arg_types[i]->size;
462 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
464 || gprcount + ngpr > MAX_GPR_REGS
465 || ssecount + nsse > MAX_SSE_REGS)
467 long align = arg_types[i]->alignment;
469 /* Stack arguments are *always* at least 8 byte aligned. */
473 /* Pass this argument in memory. */
474 argp = (void *) ALIGN (argp, align);
475 memcpy (argp, avalue[i], size);
480 /* The argument is passed entirely in registers. */
481 char *a = (char *) avalue[i];
484 for (j = 0; j < n; j++, a += 8, size -= 8)
488 case X86_64_INTEGER_CLASS:
489 case X86_64_INTEGERSI_CLASS:
490 /* Sign-extend integer arguments passed in general
491 purpose registers, to cope with the fact that
492 LLVM incorrectly assumes that this will be done
493 (the x86-64 PS ABI does not specify this). */
494 switch (arg_types[i]->type)
497 *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
499 case FFI_TYPE_SINT16:
500 *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
502 case FFI_TYPE_SINT32:
503 *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
506 reg_args->gpr[gprcount] = 0;
507 memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8);
511 case X86_64_SSE_CLASS:
512 case X86_64_SSEDF_CLASS:
513 reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
515 case X86_64_SSESF_CLASS:
516 reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
525 ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
526 cif->flags, rvalue, fn, ssecount);
530 extern void ffi_closure_unix64(void);
533 ffi_prep_closure_loc (ffi_closure* closure,
535 void (*fun)(ffi_cif*, void*, void**, void*),
539 volatile unsigned short *tramp;
541 /* Sanity check on the cif ABI. */
544 if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
548 tramp = (volatile unsigned short *) &closure->tramp[0];
550 tramp[0] = 0xbb49; /* mov <code>, %r11 */
551 *((unsigned long long * volatile) &tramp[1])
552 = (unsigned long) ffi_closure_unix64;
553 tramp[5] = 0xba49; /* mov <data>, %r10 */
554 *((unsigned long long * volatile) &tramp[6])
555 = (unsigned long) codeloc;
557 /* Set the carry bit iff the function uses any sse registers.
558 This is clc or stc, together with the first byte of the jmp. */
559 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
561 tramp[11] = 0xe3ff; /* jmp *%r11 */
565 closure->user_data = user_data;
571 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
572 struct register_args *reg_args, char *argp)
576 ffi_type **arg_types;
578 int gprcount, ssecount, ngpr, nsse;
582 avalue = alloca(cif->nargs * sizeof(void *));
583 gprcount = ssecount = 0;
585 ret = cif->rtype->type;
586 if (ret != FFI_TYPE_VOID)
588 enum x86_64_reg_class classes[MAX_CLASSES];
589 size_t n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
592 /* The return value goes in memory. Arrange for the closure
593 return value to go directly back to the original caller. */
594 rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
595 /* We don't have to do anything in asm for the return. */
598 else if (ret == FFI_TYPE_STRUCT && n == 2)
600 /* Mark which register the second word of the structure goes in. */
601 _Bool sse0 = SSE_CLASS_P (classes[0]);
602 _Bool sse1 = SSE_CLASS_P (classes[1]);
605 else if (sse0 && !sse1)
611 arg_types = cif->arg_types;
613 for (i = 0; i < avn; ++i)
615 enum x86_64_reg_class classes[MAX_CLASSES];
618 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
620 || gprcount + ngpr > MAX_GPR_REGS
621 || ssecount + nsse > MAX_SSE_REGS)
623 long align = arg_types[i]->alignment;
625 /* Stack arguments are *always* at least 8 byte aligned. */
629 /* Pass this argument in memory. */
630 argp = (void *) ALIGN (argp, align);
632 argp += arg_types[i]->size;
634 /* If the argument is in a single register, or two consecutive
635 integer registers, then we can use that address directly. */
637 || (n == 2 && !(SSE_CLASS_P (classes[0])
638 || SSE_CLASS_P (classes[1]))))
640 /* The argument is in a single register. */
641 if (SSE_CLASS_P (classes[0]))
643 avalue[i] = ®_args->sse[ssecount];
648 avalue[i] = ®_args->gpr[gprcount];
652 /* Otherwise, allocate space to make them consecutive. */
655 char *a = alloca (16);
659 for (j = 0; j < n; j++, a += 8)
661 if (SSE_CLASS_P (classes[j]))
662 memcpy (a, ®_args->sse[ssecount++], 8);
664 memcpy (a, ®_args->gpr[gprcount++], 8);
669 /* Invoke the closure. */
670 closure->fun (cif, rvalue, avalue, closure->user_data);
672 /* Tell assembly how to perform return type promotions. */
676 #endif /* __x86_64__ */