1 /* -----------------------------------------------------------------------
2 ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
3 Copyright (c) 2008 Red Hat, Inc.
5 x86-64 Foreign Function Interface
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 ``Software''), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
15 The above copyright notice and this permission notice shall be included
16 in all copies or substantial portions of the Software.
18 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 ----------------------------------------------------------------------- */
29 #include <ffi_common.h>
36 #define MAX_GPR_REGS 6
37 #define MAX_SSE_REGS 8
41 /* Registers for argument passing. */
42 UINT64 gpr[MAX_GPR_REGS];
43 __int128_t sse[MAX_SSE_REGS];
46 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
47 void *raddr, void (*fnaddr)(void), unsigned ssecount);
49 /* All reference to register classes here is identical to the code in
50 gcc/config/i386/i386.c. Do *not* change one without the other. */
52 /* Register class used for passing given 64bit part of the argument.
53 These represent classes as documented by the PS ABI, with the exception
54 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
55 use SF or DFmode move instead of DImode to avoid reformating penalties.
57 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
58 whenever possible (upper half does contain padding). */
63 X86_64_INTEGERSI_CLASS,
70 X86_64_COMPLEX_X87_CLASS,
76 #define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
78 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
79 of this code is to classify each 8bytes of incoming argument by the register
80 class and assign registers accordingly. */
82 /* Return the union class of CLASS1 and CLASS2.
83 See the x86-64 PS ABI for details. */
85 static enum x86_64_reg_class
86 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
88 /* Rule #1: If both classes are equal, this is the resulting class. */
92 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
94 if (class1 == X86_64_NO_CLASS)
96 if (class2 == X86_64_NO_CLASS)
99 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
100 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
101 return X86_64_MEMORY_CLASS;
103 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
104 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
105 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
106 return X86_64_INTEGERSI_CLASS;
107 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
108 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
109 return X86_64_INTEGER_CLASS;
111 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
113 if (class1 == X86_64_X87_CLASS
114 || class1 == X86_64_X87UP_CLASS
115 || class1 == X86_64_COMPLEX_X87_CLASS
116 || class2 == X86_64_X87_CLASS
117 || class2 == X86_64_X87UP_CLASS
118 || class2 == X86_64_COMPLEX_X87_CLASS)
119 return X86_64_MEMORY_CLASS;
121 /* Rule #6: Otherwise class SSE is used. */
122 return X86_64_SSE_CLASS;
125 /* Classify the argument of type TYPE and mode MODE.
126 CLASSES will be filled by the register class used to pass each word
127 of the operand. The number of words is returned. In case the parameter
128 should be passed in memory, 0 is returned. As a special case for zero
129 sized containers, classes[0] will be NO_CLASS and 1 is returned.
131 See the x86-64 PS ABI for details.
134 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
141 case FFI_TYPE_UINT16:
142 case FFI_TYPE_SINT16:
143 case FFI_TYPE_UINT32:
144 case FFI_TYPE_SINT32:
145 case FFI_TYPE_UINT64:
146 case FFI_TYPE_SINT64:
147 case FFI_TYPE_POINTER:
149 int size = byte_offset + type->size;
153 classes[0] = X86_64_INTEGERSI_CLASS;
158 classes[0] = X86_64_INTEGER_CLASS;
163 classes[0] = X86_64_INTEGER_CLASS;
164 classes[1] = X86_64_INTEGERSI_CLASS;
169 classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
176 if (!(byte_offset % 8))
177 classes[0] = X86_64_SSESF_CLASS;
179 classes[0] = X86_64_SSE_CLASS;
181 case FFI_TYPE_DOUBLE:
182 classes[0] = X86_64_SSEDF_CLASS;
184 case FFI_TYPE_LONGDOUBLE:
185 classes[0] = X86_64_X87_CLASS;
186 classes[1] = X86_64_X87UP_CLASS;
188 case FFI_TYPE_STRUCT:
190 const int UNITS_PER_WORD = 8;
191 int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
194 enum x86_64_reg_class subclasses[MAX_CLASSES];
196 /* If the struct is larger than 32 bytes, pass it on the stack. */
200 for (i = 0; i < words; i++)
201 classes[i] = X86_64_NO_CLASS;
203 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
204 signalize memory class, so handle it as special case. */
207 classes[0] = X86_64_NO_CLASS;
211 /* Merge the fields of structure. */
212 for (ptr = type->elements; *ptr != NULL; ptr++)
216 byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
218 num = classify_argument (*ptr, subclasses, byte_offset % 8);
221 for (i = 0; i < num; i++)
223 int pos = byte_offset / 8;
225 merge_classes (subclasses[i], classes[i + pos]);
228 byte_offset += (*ptr)->size;
233 /* When size > 16 bytes, if the first one isn't
234 X86_64_SSE_CLASS or any other ones aren't
235 X86_64_SSEUP_CLASS, everything should be passed in
237 if (classes[0] != X86_64_SSE_CLASS)
240 for (i = 1; i < words; i++)
241 if (classes[i] != X86_64_SSEUP_CLASS)
245 /* Final merger cleanup. */
246 for (i = 0; i < words; i++)
248 /* If one class is MEMORY, everything should be passed in
250 if (classes[i] == X86_64_MEMORY_CLASS)
253 /* The X86_64_SSEUP_CLASS should be always preceded by
254 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
255 if (classes[i] == X86_64_SSEUP_CLASS
256 && classes[i - 1] != X86_64_SSE_CLASS
257 && classes[i - 1] != X86_64_SSEUP_CLASS)
259 /* The first one should never be X86_64_SSEUP_CLASS. */
261 classes[i] = X86_64_SSE_CLASS;
264 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
265 everything should be passed in memory. */
266 if (classes[i] == X86_64_X87UP_CLASS
267 && (classes[i - 1] != X86_64_X87_CLASS))
269 /* The first one should never be X86_64_X87UP_CLASS. */
280 return 0; /* Never reached. */
283 /* Examine the argument and return set number of register required in each
284 class. Return zero iff parameter should be passed in memory, otherwise
285 the number of registers. */
288 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
289 _Bool in_return, int *pngpr, int *pnsse)
291 int i, n, ngpr, nsse;
293 n = classify_argument (type, classes, 0);
298 for (i = 0; i < n; ++i)
301 case X86_64_INTEGER_CLASS:
302 case X86_64_INTEGERSI_CLASS:
305 case X86_64_SSE_CLASS:
306 case X86_64_SSESF_CLASS:
307 case X86_64_SSEDF_CLASS:
310 case X86_64_NO_CLASS:
311 case X86_64_SSEUP_CLASS:
313 case X86_64_X87_CLASS:
314 case X86_64_X87UP_CLASS:
315 case X86_64_COMPLEX_X87_CLASS:
316 return in_return != 0;
327 /* Perform machine dependent cif processing. */
330 ffi_prep_cif_machdep (ffi_cif *cif)
332 int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
333 enum x86_64_reg_class classes[MAX_CLASSES];
336 gprcount = ssecount = 0;
338 flags = cif->rtype->type;
339 if (flags != FFI_TYPE_VOID)
341 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
344 /* The return value is passed in memory. A pointer to that
345 memory is the first argument. Allocate a register for it. */
347 /* We don't have to do anything in asm for the return. */
348 flags = FFI_TYPE_VOID;
350 else if (flags == FFI_TYPE_STRUCT)
352 /* Mark which registers the result appears in. */
353 _Bool sse0 = SSE_CLASS_P (classes[0]);
354 _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
357 else if (!sse0 && sse1)
359 else if (sse0 && sse1)
361 /* Mark the true size of the structure. */
362 flags |= cif->rtype->size << 12;
366 /* Go over all arguments and determine the way they should be passed.
367 If it's in a register and there is space for it, let that be so. If
368 not, add it's size to the stack byte count. */
369 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
371 if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
372 || gprcount + ngpr > MAX_GPR_REGS
373 || ssecount + nsse > MAX_SSE_REGS)
375 long align = cif->arg_types[i]->alignment;
380 bytes = ALIGN(bytes, align);
381 bytes += cif->arg_types[i]->size;
398 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
400 enum x86_64_reg_class classes[MAX_CLASSES];
402 ffi_type **arg_types;
403 int gprcount, ssecount, ngpr, nsse, i, avn;
405 struct register_args *reg_args;
407 /* Can't call 32-bit mode from 64-bit mode. */
408 FFI_ASSERT (cif->abi == FFI_UNIX64);
410 /* If the return value is a struct and we don't have a return value
411 address then we need to make one. Note the setting of flags to
412 VOID above in ffi_prep_cif_machdep. */
413 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
414 && (cif->flags & 0xff) == FFI_TYPE_VOID);
415 if (rvalue == NULL && ret_in_memory)
416 rvalue = alloca (cif->rtype->size);
418 /* Allocate the space for the arguments, plus 4 words of temp space. */
419 stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
420 reg_args = (struct register_args *) stack;
421 argp = stack + sizeof (struct register_args);
423 gprcount = ssecount = 0;
425 /* If the return value is passed in memory, add the pointer as the
426 first integer argument. */
428 reg_args->gpr[gprcount++] = (long) rvalue;
431 arg_types = cif->arg_types;
433 for (i = 0; i < avn; ++i)
435 size_t size = arg_types[i]->size;
438 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
440 || gprcount + ngpr > MAX_GPR_REGS
441 || ssecount + nsse > MAX_SSE_REGS)
443 long align = arg_types[i]->alignment;
445 /* Stack arguments are *always* at least 8 byte aligned. */
449 /* Pass this argument in memory. */
450 argp = (void *) ALIGN (argp, align);
451 memcpy (argp, avalue[i], size);
456 /* The argument is passed entirely in registers. */
457 char *a = (char *) avalue[i];
460 for (j = 0; j < n; j++, a += 8, size -= 8)
464 case X86_64_INTEGER_CLASS:
465 case X86_64_INTEGERSI_CLASS:
466 reg_args->gpr[gprcount] = 0;
467 memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8);
470 case X86_64_SSE_CLASS:
471 case X86_64_SSEDF_CLASS:
472 reg_args->sse[ssecount++] = *(UINT64 *) a;
474 case X86_64_SSESF_CLASS:
475 reg_args->sse[ssecount++] = *(UINT32 *) a;
484 ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
485 cif->flags, rvalue, fn, ssecount);
489 extern void ffi_closure_unix64(void);
492 ffi_prep_closure_loc (ffi_closure* closure,
494 void (*fun)(ffi_cif*, void*, void**, void*),
498 volatile unsigned short *tramp;
500 tramp = (volatile unsigned short *) &closure->tramp[0];
502 tramp[0] = 0xbb49; /* mov <code>, %r11 */
503 *(void * volatile *) &tramp[1] = ffi_closure_unix64;
504 tramp[5] = 0xba49; /* mov <data>, %r10 */
505 *(void * volatile *) &tramp[6] = codeloc;
507 /* Set the carry bit iff the function uses any sse registers.
508 This is clc or stc, together with the first byte of the jmp. */
509 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
511 tramp[11] = 0xe3ff; /* jmp *%r11 */
515 closure->user_data = user_data;
521 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
522 struct register_args *reg_args, char *argp)
526 ffi_type **arg_types;
528 int gprcount, ssecount, ngpr, nsse;
532 avalue = alloca(cif->nargs * sizeof(void *));
533 gprcount = ssecount = 0;
535 ret = cif->rtype->type;
536 if (ret != FFI_TYPE_VOID)
538 enum x86_64_reg_class classes[MAX_CLASSES];
539 int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
542 /* The return value goes in memory. Arrange for the closure
543 return value to go directly back to the original caller. */
544 rvalue = (void *) reg_args->gpr[gprcount++];
545 /* We don't have to do anything in asm for the return. */
548 else if (ret == FFI_TYPE_STRUCT && n == 2)
550 /* Mark which register the second word of the structure goes in. */
551 _Bool sse0 = SSE_CLASS_P (classes[0]);
552 _Bool sse1 = SSE_CLASS_P (classes[1]);
555 else if (sse0 && !sse1)
561 arg_types = cif->arg_types;
563 for (i = 0; i < avn; ++i)
565 enum x86_64_reg_class classes[MAX_CLASSES];
568 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
570 || gprcount + ngpr > MAX_GPR_REGS
571 || ssecount + nsse > MAX_SSE_REGS)
573 long align = arg_types[i]->alignment;
575 /* Stack arguments are *always* at least 8 byte aligned. */
579 /* Pass this argument in memory. */
580 argp = (void *) ALIGN (argp, align);
582 argp += arg_types[i]->size;
584 /* If the argument is in a single register, or two consecutive
585 integer registers, then we can use that address directly. */
587 || (n == 2 && !(SSE_CLASS_P (classes[0])
588 || SSE_CLASS_P (classes[1]))))
590 /* The argument is in a single register. */
591 if (SSE_CLASS_P (classes[0]))
593 avalue[i] = ®_args->sse[ssecount];
598 avalue[i] = ®_args->gpr[gprcount];
602 /* Otherwise, allocate space to make them consecutive. */
605 char *a = alloca (16);
609 for (j = 0; j < n; j++, a += 8)
611 if (SSE_CLASS_P (classes[j]))
612 memcpy (a, ®_args->sse[ssecount++], 8);
614 memcpy (a, ®_args->gpr[gprcount++], 8);
619 /* Invoke the closure. */
620 closure->fun (cif, rvalue, avalue, closure->user_data);
622 /* Tell assembly how to perform return type promotions. */
626 #endif /* __x86_64__ */