Convert indirect calls to direct when possible.
authorSriraman Tallam <tmsriram@google.com>
Tue, 28 Jun 2016 22:42:33 +0000 (15:42 -0700)
committerSriraman Tallam <tmsriram@google.com>
Tue, 28 Jun 2016 22:42:33 +0000 (15:42 -0700)
Please see patch discussion:
https://www.sourceware.org/ml/binutils/2016-05/msg00322.html

2016-06-28  Sriraman Tallam  <tmsriram@google.com>

* x86_64.cc (Lazy_view): New class.
(can_convert_mov_to_lea): Templatize function.  Make the function
check for appropriate relocation types and use the view parameter
to get section contents.
(can_convert_callq_to_direct): New function.
(Target_x86_64<size>::Scan::global): Refactor.
(Target_x86_64<size>::Relocate::relocate): Refactor. Change any indirect
call via GOT that can be converted.
* testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
* testsuite/Makefile.in: Regenerate.
* testsuite/x86_64_indirect_call_to_direct1.s: New file.
* testsuite/x86_64_indirect_jump_to_direct1.s: New file.

gold/ChangeLog
gold/testsuite/Makefile.am
gold/testsuite/Makefile.in
gold/testsuite/x86_64_indirect_call_to_direct.sh [new file with mode: 0755]
gold/testsuite/x86_64_indirect_call_to_direct1.s [new file with mode: 0644]
gold/testsuite/x86_64_indirect_jump_to_direct1.s [new file with mode: 0644]
gold/x86_64.cc

index d6bae2b..8b293cd 100644 (file)
@@ -1,3 +1,18 @@
+2016-06-28  Sriraman Tallam  <tmsriram@google.com>
+
+       * x86_64.cc (Lazy_view): New class.
+       (can_convert_mov_to_lea): Templatize function.  Make the function
+       check for appropriate relocation types and use the view parameter
+       to get section contents.
+       (can_convert_callq_to_direct): New function.
+       (Target_x86_64<size>::Scan::global): Refactor.
+       (Target_x86_64<size>::Relocate::relocate): Refactor. Change any indirect
+       call via GOT that can be converted.
+       * testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
+       * testsuite/Makefile.in: Regenerate.
+       * testsuite/x86_64_indirect_call_to_direct1.s: New file.
+       * testsuite/x86_64_indirect_jump_to_direct1.s: New file.
+
 2016-06-28  Igor Kudrin  <ikudrin@accesssoftek.com>
 
        * aarch64.cc (Target_aarch64::Scan::local): Move the call to got_section
index 9e232e9..9d4326b 100644 (file)
@@ -1096,6 +1096,25 @@ x86_64_mov_to_lea13.stdout: x86_64_mov_to_lea13
 x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14
        $(TEST_OBJDUMP) -dw $< > $@
 
+check_SCRIPTS += x86_64_indirect_call_to_direct.sh
+check_DATA += x86_64_indirect_call_to_direct1.stdout \
+       x86_64_indirect_jump_to_direct1.stdout
+MOSTLYCLEANFILES += x86_64_indirect_call_to_direct1 \
+       x86_64_indirect_jump_to_direct1
+
+x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s
+       $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld
+       gcctestdir/ld -o $@ $<
+x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1
+       $(TEST_OBJDUMP) -dw $< > $@
+x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s
+       $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld
+       gcctestdir/ld -o $@ $<
+x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1
+       $(TEST_OBJDUMP) -dw $< > $@
+
 check_SCRIPTS += x86_64_overflow_pc32.sh
 check_DATA += x86_64_overflow_pc32.err
 MOSTLYCLEANFILES += x86_64_overflow_pc32.err
index 5b66c9c..894e6dd 100644 (file)
@@ -223,6 +223,7 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
 @GCC_TRUE@@HAVE_STATIC_TRUE@@NATIVE_LINKER_TRUE@@STATIC_TLS_TRUE@@TLS_TRUE@    tls_static_pic_test
 @FN_PTRS_IN_SO_WITHOUT_PIC_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@@TLS_TRUE@am__append_26 = tls_shared_nonpic_test
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_27 = x86_64_mov_to_lea.sh \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_indirect_call_to_direct.sh \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_overflow_pc32.sh \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x32_overflow_pc32.sh
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_28 = x86_64_mov_to_lea1.stdout \
@@ -239,6 +240,8 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_mov_to_lea12.stdout \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_mov_to_lea13.stdout \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_mov_to_lea14.stdout \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_indirect_call_to_direct1.stdout \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_indirect_jump_to_direct1.stdout \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_overflow_pc32.err \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x32_overflow_pc32.err
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_29 = x86_64_mov_to_lea1 \
@@ -255,6 +258,8 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_mov_to_lea12 \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_mov_to_lea13 \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_mov_to_lea14 \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_indirect_call_to_direct1 \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_indirect_jump_to_direct1 \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x86_64_overflow_pc32.err \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     x32_overflow_pc32.err
 @DEFAULT_TARGET_I386_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_30 = i386_mov_to_lea.sh
@@ -4564,6 +4569,8 @@ tls_pie_test.sh.log: tls_pie_test.sh
        @p='tls_pie_test.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
 x86_64_mov_to_lea.sh.log: x86_64_mov_to_lea.sh
        @p='x86_64_mov_to_lea.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
+x86_64_indirect_call_to_direct.sh.log: x86_64_indirect_call_to_direct.sh
+       @p='x86_64_indirect_call_to_direct.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
 x86_64_overflow_pc32.sh.log: x86_64_overflow_pc32.sh
        @p='x86_64_overflow_pc32.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
 x32_overflow_pc32.sh.log: x32_overflow_pc32.sh
@@ -5641,6 +5648,19 @@ uninstall-am:
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     $(TEST_OBJDUMP) -dw $< > $@
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     $(TEST_OBJDUMP) -dw $< > $@
+
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     gcctestdir/ld -o $@ $<
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     $(TEST_OBJDUMP) -dw $< > $@
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     gcctestdir/ld -o $@ $<
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     $(TEST_OBJDUMP) -dw $< > $@
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_overflow_pc32.o: x86_64_overflow_pc32.s
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@     $(TEST_AS) -o $@ $<
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_overflow_pc32.err: x86_64_overflow_pc32.o gcctestdir/ld
diff --git a/gold/testsuite/x86_64_indirect_call_to_direct.sh b/gold/testsuite/x86_64_indirect_call_to_direct.sh
new file mode 100755 (executable)
index 0000000..916e1a3
--- /dev/null
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+# x86_64_indirect_call_to_direct.sh -- a test for indirect call(jump) to direct
+# conversion.
+
+# Copyright (C) 2016 Free Software Foundation, Inc.
+# Written by Sriraman Tallam <tmsriram@google.com>
+
+# This file is part of gold.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+set -e
+
+grep -q "callq[ ]\+[a-f0-9]\+ <foo>" x86_64_indirect_call_to_direct1.stdout
+grep -q "jmpq[ ]\+[a-f0-9]\+ <foo>" x86_64_indirect_jump_to_direct1.stdout
diff --git a/gold/testsuite/x86_64_indirect_call_to_direct1.s b/gold/testsuite/x86_64_indirect_call_to_direct1.s
new file mode 100644 (file)
index 0000000..5ca2e38
--- /dev/null
@@ -0,0 +1,12 @@
+       .text
+       .globl  foo
+       .type   foo, @function
+foo:
+       ret
+       .size   foo, .-foo
+       .globl  main
+       .type   main, @function
+main:
+       call    *foo@GOTPCREL(%rip)
+       ret
+       .size   main, .-main
diff --git a/gold/testsuite/x86_64_indirect_jump_to_direct1.s b/gold/testsuite/x86_64_indirect_jump_to_direct1.s
new file mode 100644 (file)
index 0000000..b817e34
--- /dev/null
@@ -0,0 +1,11 @@
+       .text
+       .globl  foo
+       .type   foo, @function
+foo:
+       ret
+       .size   foo, .-foo
+       .globl  main
+       .type   main, @function
+main:
+       jmp     *foo@GOTPCREL(%rip)
+       .size   main, .-main
index d069957..6c511e2 100644 (file)
@@ -403,6 +403,33 @@ class Output_data_plt_x86_64_standard : public Output_data_plt_x86_64<size>
   static const unsigned char plt_eh_frame_fde[plt_eh_frame_fde_size];
 };
 
+template<int size>
+class Lazy_view
+{
+ public:
+  Lazy_view(Sized_relobj_file<size, false>* object, unsigned int data_shndx)
+    : object_(object), data_shndx_(data_shndx), view_(NULL), view_size_(0)
+  { }
+
+  inline unsigned char
+  operator[](size_t offset)
+  {
+    if (this->view_ == NULL)
+      this->view_ = this->object_->section_contents(this->data_shndx_,
+                                                    &this->view_size_,
+                                                    true);
+    if (offset >= this->view_size_)
+      return 0;
+    return this->view_[offset];
+  }
+
+ private:
+  Sized_relobj_file<size, false>* object_;
+  unsigned int data_shndx_;
+  const unsigned char* view_;
+  section_size_type view_size_;
+};
+
 // The x86_64 target class.
 // See the ABI at
 //   http://www.x86-64.org/documentation/abi.pdf
@@ -876,19 +903,62 @@ class Target_x86_64 : public Sized_target<size, false>
   // conversion from
   // mov foo@GOTPCREL(%rip), %reg
   // to lea foo(%rip), %reg.
-  static bool
-  can_convert_mov_to_lea(const Symbol* gsym)
+  template<class View_type>
+  static inline bool
+  can_convert_mov_to_lea(const Symbol* gsym, unsigned int r_type,
+                         size_t r_offset, View_type* view)
   {
     gold_assert(gsym != NULL);
-    return (gsym->type() != elfcpp::STT_GNU_IFUNC
-           && !gsym->is_undefined ()
-           && !gsym->is_from_dynobj()
-           && !gsym->is_preemptible()
-           && (!parameters->options().shared()
-               || (gsym->visibility() != elfcpp::STV_DEFAULT
-                   && gsym->visibility() != elfcpp::STV_PROTECTED)
-               || parameters->options().Bsymbolic())
-           && strcmp(gsym->name(), "_DYNAMIC") != 0);
+    // We cannot do the conversion unless it's one of these relocations.
+    if (r_type != elfcpp::R_X86_64_GOTPCREL
+        && r_type != elfcpp::R_X86_64_GOTPCRELX
+        && r_type != elfcpp::R_X86_64_REX_GOTPCRELX)
+      return false;
+    // We cannot convert references to IFUNC symbols, or to symbols that
+    // are not local to the current module.
+    if (gsym->type() == elfcpp::STT_GNU_IFUNC
+        || gsym->is_undefined ()
+        || gsym->is_from_dynobj()
+        || gsym->is_preemptible())
+      return false;
+    // If we are building a shared object and the symbol is protected, we may
+    // need to go through the GOT.
+    if (parameters->options().shared()
+        && gsym->visibility() == elfcpp::STV_PROTECTED)
+      return false;
+    // We cannot convert references to the _DYNAMIC symbol.
+    if (strcmp(gsym->name(), "_DYNAMIC") == 0)
+      return false;
+    // Check for a MOV opcode.
+    return (*view)[r_offset - 2] == 0x8b;
+  }
+
+  // Convert
+  // callq *foo@GOTPCRELX(%rip) to
+  // addr32 callq foo
+  // and jmpq *foo@GOTPCRELX(%rip) to
+  // jmpq foo
+  // nop
+  template<class View_type>
+  static inline bool
+  can_convert_callq_to_direct(const Symbol* gsym, unsigned int r_type,
+                             size_t r_offset, View_type* view)
+  {
+    gold_assert(gsym != NULL);
+    // We cannot do the conversion unless it's a GOTPCRELX relocation.
+    if (r_type != elfcpp::R_X86_64_GOTPCRELX)
+      return false;
+    // We cannot convert references to IFUNC symbols, or to symbols that
+    // are not local to the current module.
+    if (gsym->type() == elfcpp::STT_GNU_IFUNC
+        || gsym->is_undefined ()
+        || gsym->is_from_dynobj()
+        || gsym->is_preemptible())
+      return false;
+    // Check for a CALLQ or JMPQ opcode.
+    return ((*view)[r_offset - 2] == 0xff
+            && ((*view)[r_offset - 1] == 0x15
+                || (*view)[r_offset - 1] == 0x25));
   }
 
   // Adjust TLS relocation type based on the options and whether this
@@ -2935,19 +3005,24 @@ Target_x86_64<size>::Scan::global(Symbol_table* symtab,
        // If we convert this from
        // mov foo@GOTPCREL(%rip), %reg
        // to lea foo(%rip), %reg.
+       // OR
+       // if we convert
+       // (callq|jmpq) *foo@GOTPCRELX(%rip) to
+       // (callq|jmpq) foo
        // in Relocate::relocate, then there is nothing to do here.
-       if ((r_type == elfcpp::R_X86_64_GOTPCREL
-            || r_type == elfcpp::R_X86_64_GOTPCRELX
-            || r_type == elfcpp::R_X86_64_REX_GOTPCRELX)
-           && reloc.get_r_offset() >= 2
-           && Target_x86_64<size>::can_convert_mov_to_lea(gsym))
-         {
-           section_size_type stype;
-           const unsigned char* view = object->section_contents(data_shndx,
-                                                                &stype, true);
-           if (view[reloc.get_r_offset() - 2] == 0x8b)
-             break;
-         }
+
+        Lazy_view<size> view(object, data_shndx);
+        size_t r_offset = reloc.get_r_offset();
+        if (r_offset >= 2
+            && Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
+                                                           r_offset, &view))
+          break;
+
+       if (r_offset >= 2
+           && Target_x86_64<size>::can_convert_callq_to_direct(gsym, r_type,
+                                                               r_offset,
+                                                               &view))
+          break;
 
        if (gsym->final_value_is_known())
          {
@@ -3629,15 +3704,56 @@ Target_x86_64<size>::Relocate::relocate(
       // mov foo@GOTPCREL(%rip), %reg
       // to lea foo(%rip), %reg.
       // if possible.
-      if (rela.get_r_offset() >= 2
-         && view[-2] == 0x8b
-         && ((gsym == NULL && !psymval->is_ifunc_symbol())
-             || (gsym != NULL
-                 && Target_x86_64<size>::can_convert_mov_to_lea(gsym))))
+       if ((gsym == NULL
+             && rela.get_r_offset() >= 2
+             && view[-2] == 0x8b
+             && !psymval->is_ifunc_symbol())
+            || (gsym != NULL
+                && rela.get_r_offset() >= 2
+                && Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
+                                                               0, &view)))
        {
          view[-2] = 0x8d;
          Reloc_funcs::pcrela32(view, object, psymval, addend, address);
        }
+      // Convert
+      // callq *foo@GOTPCRELX(%rip) to
+      // addr32 callq foo
+      // and jmpq *foo@GOTPCRELX(%rip) to
+      // jmpq foo
+      // nop
+      else if (gsym != NULL
+              && rela.get_r_offset() >= 2
+              && Target_x86_64<size>::can_convert_callq_to_direct(gsym,
+                                                                  r_type,
+                                                                  0, &view))
+       {
+         if (view[-1] == 0x15)
+           {
+             // Convert callq *foo@GOTPCRELX(%rip) to addr32 callq.
+             // Opcode of addr32 is 0x67 and opcode of direct callq is 0xe8.
+             view[-2] = 0x67;
+             view[-1] = 0xe8;
+             // Convert GOTPCRELX to 32-bit pc relative reloc.
+             Reloc_funcs::pcrela32(view, object, psymval, addend, address);
+           }
+         else
+           {
+             // Convert jmpq *foo@GOTPCRELX(%rip) to
+             // jmpq foo
+             // nop
+             // The opcode of direct jmpq is 0xe9.
+             view[-2] = 0xe9;
+             // The opcode of nop is 0x90.
+             view[3] = 0x90;
+             // Convert GOTPCRELX to 32-bit pc relative reloc.  jmpq is rip
+             // relative and since the instruction following the jmpq is now
+             // the nop, offset the address by 1 byte.  The start of the
+              // relocation also moves ahead by 1 byte.
+             Reloc_funcs::pcrela32(&view[-1], object, psymval, addend,
+                                   address - 1);
+           }
+       }
       else
        {
          if (gsym != NULL)