Save/restore bound registers in _dl_runtime_resolve
authorIgor Zamyatin <igor.zamyatin@intel.com>
Tue, 1 Apr 2014 17:16:04 +0000 (10:16 -0700)
committerH.J. Lu <hjl.tools@gmail.com>
Wed, 9 Apr 2014 22:38:09 +0000 (15:38 -0700)
This patch saves and restores bound registers in symbol lookup for x86-64:

1. Branches without BND prefix clear bound registers.
2. x86-64 pass bounds in bound registers as specified in MPX psABI
extension on hjl/mpx/master branch at

https://github.com/hjl-tools/x86-64-psABI
https://groups.google.com/forum/#!topic/x86-64-abi/KFsB0XTgWYc

Binutils has been updated to create an alternate PLT to add BND prefix
when branching to ld.so.

* config.h.in (HAVE_MPX_SUPPORT): New #undef.
* sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
* sysdeps/x86_64/configure: Regenerated.
* sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New
macro.
(REGISTER_SAVE_RAX): Likewise.
(REGISTER_SAVE_RCX): Likewise.
(REGISTER_SAVE_RDX): Likewise.
(REGISTER_SAVE_RSI): Likewise.
(REGISTER_SAVE_RDI): Likewise.
(REGISTER_SAVE_R8): Likewise.
(REGISTER_SAVE_R9): Likewise.
(REGISTER_SAVE_BND0): Likewise.
(REGISTER_SAVE_BND1): Likewise.
(REGISTER_SAVE_BND2): Likewise.
(_dl_runtime_resolve): Use them.  Save and restore Intel MPX
bound registers when calling _dl_fixup.

ChangeLog
config.h.in
sysdeps/x86_64/configure
sysdeps/x86_64/configure.ac
sysdeps/x86_64/dl-trampoline.S

index 4598b60..7cc48ce 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2014-04-09  Igor Zamyatin  <igor.zamyatin@intel.com>
+           H.J. Lu  <hongjiu.lu@intel.com>
+
+       * config.h.in (HAVE_MPX_SUPPORT): New #undef.
+       * sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
+       * sysdeps/x86_64/configure: Regenerated.
+       * sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New
+       macro.
+       (REGISTER_SAVE_RAX): Likewise.
+       (REGISTER_SAVE_RCX): Likewise.
+       (REGISTER_SAVE_RDX): Likewise.
+       (REGISTER_SAVE_RSI): Likewise.
+       (REGISTER_SAVE_RDI): Likewise.
+       (REGISTER_SAVE_R8): Likewise.
+       (REGISTER_SAVE_R9): Likewise.
+       (REGISTER_SAVE_BND0): Likewise.
+       (REGISTER_SAVE_BND1): Likewise.
+       (REGISTER_SAVE_BND2): Likewise.
+       (_dl_runtime_resolve): Use them.  Save and restore Intel MPX
+       bound registers when calling _dl_fixup.
+
 2014-04-09  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
 
        * bits/string.h (_STRING_ARCH_unaligned): Define it to 0.
index 3fc34bd..b6e3623 100644 (file)
 /* Define if assembler supports AVX512.  */
 #undef  HAVE_AVX512_ASM_SUPPORT
 
+/* Define if assembler supports Intel MPX.  */
+#undef  HAVE_MPX_SUPPORT
+
 /* Define if gcc supports FMA4.  */
 #undef HAVE_FMA4_SUPPORT
 
index c1c88c8..45d868d 100644 (file)
@@ -222,6 +222,33 @@ $as_echo "$libc_cv_cc_novzeroupper" >&6; }
 config_vars="$config_vars
 config-cflags-novzeroupper = $libc_cv_cc_novzeroupper"
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
+$as_echo_n "checking for Intel MPX support... " >&6; }
+if ${libc_cv_asm_mpx+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.s <<\EOF
+        bndmov %bnd0,(%rsp)
+EOF
+if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+  libc_cv_asm_mpx=yes
+else
+  libc_cv_asm_mpx=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5
+$as_echo "$libc_cv_asm_mpx" >&6; }
+if test $libc_cv_asm_mpx == yes; then
+  $as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h
+
+fi
+
 $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
 
 # work around problem with autoconf and empty lines at the end of files
index d34f9a8..9138f63 100644 (file)
@@ -75,6 +75,21 @@ LIBC_TRY_CC_OPTION([-mno-vzeroupper],
 ])
 LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper])
 
+dnl Check whether asm supports Intel MPX
+AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl
+cat > conftest.s <<\EOF
+        bndmov %bnd0,(%rsp)
+EOF
+if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
+  libc_cv_asm_mpx=yes
+else
+  libc_cv_asm_mpx=no
+fi
+rm -f conftest*])
+if test $libc_cv_asm_mpx == yes; then
+  AC_DEFINE(HAVE_MPX_SUPPORT)
+fi
+
 dnl It is always possible to access static and hidden symbols in an
 dnl position independent way.
 AC_DEFINE(PI_STATIC_AND_HIDDEN)
index 77c4d0f..3c0f542 100644 (file)
 # error RTLD_SAVESPACE_SSE must be aligned to 32 bytes
 #endif
 
+/* Area on stack to save and restore registers used for parameter
+   passing when calling _dl_fixup.  */
+#ifdef __ILP32__
+/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX.  */
+# define REGISTER_SAVE_AREA    (8 * 7)
+# define REGISTER_SAVE_RAX     0
+#else
+/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0,
+   BND1, BND2, BND3.  */
+# define REGISTER_SAVE_AREA    (8 * 7 + 16 * 4)
+/* Align bound register save area to 16 bytes.  */
+# define REGISTER_SAVE_BND0    0
+# define REGISTER_SAVE_BND1    (REGISTER_SAVE_BND0 + 16)
+# define REGISTER_SAVE_BND2    (REGISTER_SAVE_BND1 + 16)
+# define REGISTER_SAVE_BND3    (REGISTER_SAVE_BND2 + 16)
+# define REGISTER_SAVE_RAX     (REGISTER_SAVE_BND3 + 16)
+#endif
+#define REGISTER_SAVE_RCX      (REGISTER_SAVE_RAX + 8)
+#define REGISTER_SAVE_RDX      (REGISTER_SAVE_RCX + 8)
+#define REGISTER_SAVE_RSI      (REGISTER_SAVE_RDX + 8)
+#define REGISTER_SAVE_RDI      (REGISTER_SAVE_RSI + 8)
+#define REGISTER_SAVE_R8       (REGISTER_SAVE_RDI + 8)
+#define REGISTER_SAVE_R9       (REGISTER_SAVE_R8 + 8)
+
        .text
        .globl _dl_runtime_resolve
        .type _dl_runtime_resolve, @function
        cfi_startproc
 _dl_runtime_resolve:
        cfi_adjust_cfa_offset(16) # Incorporate PLT
-       subq $56,%rsp
-       cfi_adjust_cfa_offset(56)
-       movq %rax,(%rsp)        # Preserve registers otherwise clobbered.
-       movq %rcx, 8(%rsp)
-       movq %rdx, 16(%rsp)
-       movq %rsi, 24(%rsp)
-       movq %rdi, 32(%rsp)
-       movq %r8, 40(%rsp)
-       movq %r9, 48(%rsp)
-       movq 64(%rsp), %rsi     # Copy args pushed by PLT in register.
-       movq 56(%rsp), %rdi     # %rdi: link_map, %rsi: reloc_index
+       subq $REGISTER_SAVE_AREA,%rsp
+       cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+       # Preserve registers otherwise clobbered.
+       movq %rax, REGISTER_SAVE_RAX(%rsp)
+       movq %rcx, REGISTER_SAVE_RCX(%rsp)
+       movq %rdx, REGISTER_SAVE_RDX(%rsp)
+       movq %rsi, REGISTER_SAVE_RSI(%rsp)
+       movq %rdi, REGISTER_SAVE_RDI(%rsp)
+       movq %r8, REGISTER_SAVE_R8(%rsp)
+       movq %r9, REGISTER_SAVE_R9(%rsp)
+#ifndef __ILP32__
+       # We also have to preserve bound registers.  These are nops if
+       # Intel MPX isn't available or disabled.
+# ifdef HAVE_MPX_SUPPORT
+       bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
+       bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
+       bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
+       bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
+# else
+       .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
+       .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
+       .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
+       .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
+# endif
+#endif
+       # Copy args pushed by PLT in register.
+       # %rdi: link_map, %rsi: reloc_index
+       movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi
+       movq REGISTER_SAVE_AREA(%rsp), %rdi
        call _dl_fixup          # Call resolver.
        movq %rax, %r11         # Save return value
-       movq 48(%rsp), %r9      # Get register content back.
-       movq 40(%rsp), %r8
-       movq 32(%rsp), %rdi
-       movq 24(%rsp), %rsi
-       movq 16(%rsp), %rdx
-       movq 8(%rsp), %rcx
-       movq (%rsp), %rax
-       addq $72, %rsp          # Adjust stack(PLT did 2 pushes)
-       cfi_adjust_cfa_offset(-72)
+#ifndef __ILP32__
+       # Restore bound registers.  These are nops if Intel MPX isn't
+       # avaiable or disabled.
+# ifdef HAVE_MPX_SUPPORT
+       bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
+       bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
+       bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
+       bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
+# else
+       .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
+       .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
+       .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
+       .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
+# endif
+#endif
+       # Get register content back.
+       movq REGISTER_SAVE_R9(%rsp), %r9
+       movq REGISTER_SAVE_R8(%rsp), %r8
+       movq REGISTER_SAVE_RDI(%rsp), %rdi
+       movq REGISTER_SAVE_RSI(%rsp), %rsi
+       movq REGISTER_SAVE_RDX(%rsp), %rdx
+       movq REGISTER_SAVE_RCX(%rsp), %rcx
+       movq REGISTER_SAVE_RAX(%rsp), %rax
+       # Adjust stack(PLT did 2 pushes)
+       addq $(REGISTER_SAVE_AREA + 16), %rsp
+       cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16))
        jmp *%r11               # Jump to function address.
        cfi_endproc
        .size _dl_runtime_resolve, .-_dl_runtime_resolve