libffi_version_info = -version-info `grep -v '^\#' $(srcdir)/libtool-version`
libffi.map: $(top_srcdir)/libffi.map.in
- $(COMPILE) -D$(TARGET) -E -x assembler-with-cpp -o $@ $<
+ $(COMPILE) -D$(TARGET) -DGENERATE_LIBFFI_MAP \
+ -E -x assembler-with-cpp -o $@ $<
libffi_la_LDFLAGS = -no-undefined $(libffi_version_info) $(libffi_version_script) $(LTLDFLAGS) $(AM_LTLDFLAGS)
libffi_la_DEPENDENCIES = $(libffi_la_LIBADD) $(libffi_version_dep)
# release, then set age to 0.
#
# CURRENT:REVISION:AGE
-8:0:1
+9:0:1
return FFI_BAD_ABI;
}
+ /* endbr32. */
+ *(UINT32 *) tramp = 0xfb1e0ff3;
+
/* movl or pushl immediate. */
- tramp[0] = op;
- *(void **)(tramp + 1) = codeloc;
+ tramp[4] = op;
+ *(void **)(tramp + 5) = codeloc;
/* jmp dest */
- tramp[5] = 0xe9;
- *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+ tramp[9] = 0xe9;
+ *(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 10);
closure->cif = cif;
closure->fun = fun;
void *user_data,
void *codeloc)
{
- static const unsigned char trampoline[16] = {
- /* leaq -0x7(%rip),%r10 # 0x0 */
- 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
- /* jmpq *0x3(%rip) # 0x10 */
- 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
- /* nopl (%rax) */
- 0x0f, 0x1f, 0x00
+ static const unsigned char trampoline[24] = {
+ /* endbr64 */
+ 0xf3, 0x0f, 0x1e, 0xfa,
+ /* leaq -0xb(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
+ /* jmpq *0x7(%rip) # 0x18 */
+ 0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
+ /* nopl 0(%rax) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
void (*dest)(void);
char *tramp = closure->tramp;
dest = ffi_closure_unix64;
memcpy (tramp, trampoline, sizeof(trampoline));
- *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
+ *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest;
closure->cif = cif;
closure->fun = fun;
#if defined (X86_64) || defined(X86_WIN64) \
|| (defined (__x86_64__) && defined (X86_DARWIN))
-# define FFI_TRAMPOLINE_SIZE 24
+/* 4 bytes of ENDBR64 + 7 bytes of LEA + 6 bytes of JMP + 7 bytes of NOP
+ + 8 bytes of pointer. */
+# define FFI_TRAMPOLINE_SIZE 32
# define FFI_NATIVE_RAW_API 0
#else
-# define FFI_TRAMPOLINE_SIZE 12
+/* 4 bytes of ENDBR32 + 5 bytes of MOV + 5 bytes of JMP + 2 unused
+ bytes. */
+# define FFI_TRAMPOLINE_SIZE 16
# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
#endif
+#if !defined(GENERATE_LIBFFI_MAP) && defined(__ASSEMBLER__) \
+ && defined(__CET__)
+# include <cet.h>
+# define _CET_NOTRACK notrack
+#else
+# define _CET_ENDBR
+# define _CET_NOTRACK
+#endif
+
#endif
void *user_data,
void *codeloc)
{
- static const unsigned char trampoline[16] = {
- /* leaq -0x7(%rip),%r10 # 0x0 */
- 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
- /* jmpq *0x3(%rip) # 0x10 */
- 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
- /* nopl (%rax) */
- 0x0f, 0x1f, 0x00
+ static const unsigned char trampoline[FFI_TRAMPOLINE_SIZE - 8] = {
+ /* endbr64 */
+ 0xf3, 0x0f, 0x1e, 0xfa,
+ /* leaq -0xb(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
+ /* jmpq *0x7(%rip) # 0x18 */
+ 0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
+ /* nopl 0(%rax) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
char *tramp = closure->tramp;
}
memcpy (tramp, trampoline, sizeof(trampoline));
- *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
+ *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64;
closure->cif = cif;
closure->fun = fun;
ffi_call_i386:
L(UW0):
# cfi_startproc
+ _CET_ENDBR
#if !HAVE_FASTCALL
movl 4(%esp), %ecx
movl 8(%esp), %edx
leal L(store_table)(,%ecx, 8), %ebx
#endif
movl 16(%ebp), %ecx /* load result address */
- jmp *%ebx
+ _CET_NOTRACK jmp *%ebx
.balign 8
L(store_table):
andl $X86_RET_TYPE_MASK, %eax; \
leal L(C1(load_table,N))(, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
#ifdef __PIC__
# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
L(C1(pc,N)): \
leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
# else
# define FFI_CLOSURE_CALL_INNER_SAVE_EBX
# undef FFI_CLOSURE_CALL_INNER
L(C1(UW,UWN)): \
/* cfi_restore(%ebx); */ \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
# endif /* DARWIN || HIDDEN */
#endif /* __PIC__ */
C(ffi_go_closure_EAX):
L(UW6):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW7):
# cfi_def_cfa_offset(closure_FS + 4)
C(ffi_go_closure_ECX):
L(UW9):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW10):
# cfi_def_cfa_offset(closure_FS + 4)
C(ffi_closure_i386):
L(UW12):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW13):
# cfi_def_cfa_offset(closure_FS + 4)
C(ffi_go_closure_STDCALL):
L(UW21):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW22):
# cfi_def_cfa_offset(closure_FS + 4)
# cfi_startproc
# cfi_def_cfa(%esp, 8)
# cfi_offset(%eip, -8)
+ _CET_ENDBR
subl $closure_FS-4, %esp
L(UW25):
# cfi_def_cfa_offset(closure_FS + 4)
C(ffi_closure_STDCALL):
L(UW27):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW28):
# cfi_def_cfa_offset(closure_FS + 4)
C(ffi_closure_raw_SYSV):
L(UW32):
# cfi_startproc
+ _CET_ENDBR
subl $raw_closure_S_FS, %esp
L(UW33):
# cfi_def_cfa_offset(raw_closure_S_FS + 4)
C(ffi_closure_raw_THISCALL):
L(UW41):
# cfi_startproc
+ _CET_ENDBR
/* Rearrange the stack such that %ecx is the first argument.
This means moving the return address. */
popl %edx
#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
# define E(BASE, X) .balign 8
#else
-# define E(BASE, X) .balign 8; .org BASE + X * 8
+# ifdef __CET__
+# define E(BASE, X) .balign 8; .org BASE + X * 16
+# else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+# endif
#endif
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
C(ffi_call_unix64):
L(UW0):
+ _CET_ENDBR
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
movzbl %cl, %r10d
leaq L(store_table)(%rip), %r11
ja L(sa)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
/* Prep for the structure cases: scratch area in redzone. */
.balign 8
L(store_table):
E(L(store_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(store_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl %al, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl %ax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl %eax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbq %al, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswq %ax, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
cltq
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_X87)
+ _CET_ENDBR
fstpt (%rdi)
ret
E(L(store_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fstpt (%rdi)
fstpt 16(%rdi)
ret
E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq %rax, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq %xmm0, 8(%rsi)
jmp L(s2)
E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq %xmm1, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq %rdx, 8(%rsi)
L(s2):
movq %rax, (%rsi)
C(ffi_closure_unix64_sse):
L(UW5):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW6):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
C(ffi_closure_unix64):
L(UW8):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW9):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
movzbl %al, %r10d
leaq L(load_table)(%rip), %r11
ja L(la)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
.balign 8
L(load_table):
E(L(load_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(load_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq (%rsi), %rax
ret
E(L(load_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_X87)
+ _CET_ENDBR
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fldt 16(%rsi)
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq 8(%rsi), %rax
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq 8(%rsi), %xmm0
jmp L(l2)
E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq 8(%rsi), %xmm1
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq 8(%rsi), %rdx
L(l2):
movq (%rsi), %rax
C(ffi_go_closure_unix64_sse):
L(UW12):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW13):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
C(ffi_go_closure_unix64):
L(UW15):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW16):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
#endif
/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
-#define ADV(N, P) .byte 2, L(N)-L(P)
+#ifdef __CET__
+/* Use DW_CFA_advance_loc2 when IBT is enabled. */
+# define ADV(N, P) .byte 3; .2byte L(N)-L(P)
+#else
+# define ADV(N, P) .byte 2, L(N)-L(P)
+#endif
.balign 8
L(CIE):
SEH(.seh_proc ffi_call_win64)
C(ffi_call_win64):
cfi_startproc
+ _CET_ENDBR
/* Set up the local stack frame and install it in rbp/rsp. */
movq (%rsp), %rax
movq %rbp, (arg1)
cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
leaq (%r10, %rcx, 8), %r10
ja 99f
- jmp *%r10
+ _CET_NOTRACK jmp *%r10
/* Below, we're space constrained most of the time. Thus we eschew the
modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
SEH(.seh_proc ffi_go_closure_win64)
C(ffi_go_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
SEH(.seh_proc ffi_closure_win64)
C(ffi_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)