From 3a4f096e5ff41e2a3b832cf6c815ad1f28437c63 Mon Sep 17 00:00:00 2001 From: Sriraman Tallam Date: Tue, 28 Jun 2016 15:42:33 -0700 Subject: [PATCH] Convert indirect calls to direct when possible. Please see patch discussion: https://www.sourceware.org/ml/binutils/2016-05/msg00322.html 2016-06-28 Sriraman Tallam * x86_64.cc (Lazy_view): New class. (can_convert_mov_to_lea): Templatize function. Make the function check for appropriate relocation types and use the view parameter to get section contents. (can_convert_callq_to_direct): New function. (Target_x86_64::Scan::global): Refactor. (Target_x86_64::Relocate::relocate): Refactor. Change any indirect call via GOT that can be converted. * testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test. * testsuite/Makefile.in: Regenerate. * testsuite/x86_64_indirect_call_to_direct1.s: New file. * testsuite/x86_64_indirect_jump_to_direct1.s: New file. --- gold/ChangeLog | 15 ++ gold/testsuite/Makefile.am | 19 +++ gold/testsuite/Makefile.in | 20 +++ gold/testsuite/x86_64_indirect_call_to_direct.sh | 29 ++++ gold/testsuite/x86_64_indirect_call_to_direct1.s | 12 ++ gold/testsuite/x86_64_indirect_jump_to_direct1.s | 11 ++ gold/x86_64.cc | 172 +++++++++++++++++++---- 7 files changed, 250 insertions(+), 28 deletions(-) create mode 100755 gold/testsuite/x86_64_indirect_call_to_direct.sh create mode 100644 gold/testsuite/x86_64_indirect_call_to_direct1.s create mode 100644 gold/testsuite/x86_64_indirect_jump_to_direct1.s diff --git a/gold/ChangeLog b/gold/ChangeLog index d6bae2b..8b293cd 100644 --- a/gold/ChangeLog +++ b/gold/ChangeLog @@ -1,3 +1,18 @@ +2016-06-28 Sriraman Tallam + + * x86_64.cc (Lazy_view): New class. + (can_convert_mov_to_lea): Templatize function. Make the function + check for appropriate relocation types and use the view parameter + to get section contents. + (can_convert_callq_to_direct): New function. + (Target_x86_64::Scan::global): Refactor. + (Target_x86_64::Relocate::relocate): Refactor. Change any indirect + call via GOT that can be converted. + * testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test. + * testsuite/Makefile.in: Regenerate. + * testsuite/x86_64_indirect_call_to_direct1.s: New file. + * testsuite/x86_64_indirect_jump_to_direct1.s: New file. + 2016-06-28 Igor Kudrin * aarch64.cc (Target_aarch64::Scan::local): Move the call to got_section diff --git a/gold/testsuite/Makefile.am b/gold/testsuite/Makefile.am index 9e232e9..9d4326b 100644 --- a/gold/testsuite/Makefile.am +++ b/gold/testsuite/Makefile.am @@ -1096,6 +1096,25 @@ x86_64_mov_to_lea13.stdout: x86_64_mov_to_lea13 x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14 $(TEST_OBJDUMP) -dw $< > $@ +check_SCRIPTS += x86_64_indirect_call_to_direct.sh +check_DATA += x86_64_indirect_call_to_direct1.stdout \ + x86_64_indirect_jump_to_direct1.stdout +MOSTLYCLEANFILES += x86_64_indirect_call_to_direct1 \ + x86_64_indirect_jump_to_direct1 + +x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s + $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $< +x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld + gcctestdir/ld -o $@ $< +x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1 + $(TEST_OBJDUMP) -dw $< > $@ +x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s + $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $< +x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld + gcctestdir/ld -o $@ $< +x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1 + $(TEST_OBJDUMP) -dw $< > $@ + check_SCRIPTS += x86_64_overflow_pc32.sh check_DATA += x86_64_overflow_pc32.err MOSTLYCLEANFILES += x86_64_overflow_pc32.err diff --git a/gold/testsuite/Makefile.in b/gold/testsuite/Makefile.in index 5b66c9c..894e6dd 100644 --- a/gold/testsuite/Makefile.in +++ b/gold/testsuite/Makefile.in @@ -223,6 +223,7 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ @GCC_TRUE@@HAVE_STATIC_TRUE@@NATIVE_LINKER_TRUE@@STATIC_TLS_TRUE@@TLS_TRUE@ tls_static_pic_test @FN_PTRS_IN_SO_WITHOUT_PIC_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@@TLS_TRUE@am__append_26 = tls_shared_nonpic_test @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_27 = x86_64_mov_to_lea.sh \ +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_call_to_direct.sh \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_overflow_pc32.sh \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x32_overflow_pc32.sh @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_28 = x86_64_mov_to_lea1.stdout \ @@ -239,6 +240,8 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea12.stdout \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea13.stdout \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea14.stdout \ +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_call_to_direct1.stdout \ +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_jump_to_direct1.stdout \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_overflow_pc32.err \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x32_overflow_pc32.err @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_29 = x86_64_mov_to_lea1 \ @@ -255,6 +258,8 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea12 \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea13 \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea14 \ +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_call_to_direct1 \ +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_jump_to_direct1 \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_overflow_pc32.err \ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x32_overflow_pc32.err @DEFAULT_TARGET_I386_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_30 = i386_mov_to_lea.sh @@ -4564,6 +4569,8 @@ tls_pie_test.sh.log: tls_pie_test.sh @p='tls_pie_test.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post) x86_64_mov_to_lea.sh.log: x86_64_mov_to_lea.sh @p='x86_64_mov_to_lea.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post) +x86_64_indirect_call_to_direct.sh.log: x86_64_indirect_call_to_direct.sh + @p='x86_64_indirect_call_to_direct.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post) x86_64_overflow_pc32.sh.log: x86_64_overflow_pc32.sh @p='x86_64_overflow_pc32.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post) x32_overflow_pc32.sh.log: x32_overflow_pc32.sh @@ -5641,6 +5648,19 @@ uninstall-am: @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_OBJDUMP) -dw $< > $@ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_OBJDUMP) -dw $< > $@ + +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $< +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ gcctestdir/ld -o $@ $< +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1 +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_OBJDUMP) -dw $< > $@ +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $< +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ gcctestdir/ld -o $@ $< +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1 +@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_OBJDUMP) -dw $< > $@ @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_overflow_pc32.o: x86_64_overflow_pc32.s @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_AS) -o $@ $< @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_overflow_pc32.err: x86_64_overflow_pc32.o gcctestdir/ld diff --git a/gold/testsuite/x86_64_indirect_call_to_direct.sh b/gold/testsuite/x86_64_indirect_call_to_direct.sh new file mode 100755 index 0000000..916e1a3 --- /dev/null +++ b/gold/testsuite/x86_64_indirect_call_to_direct.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +# x86_64_indirect_call_to_direct.sh -- a test for indirect call(jump) to direct +# conversion. + +# Copyright (C) 2016 Free Software Foundation, Inc. +# Written by Sriraman Tallam + +# This file is part of gold. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, +# MA 02110-1301, USA. + +set -e + +grep -q "callq[ ]\+[a-f0-9]\+ " x86_64_indirect_call_to_direct1.stdout +grep -q "jmpq[ ]\+[a-f0-9]\+ " x86_64_indirect_jump_to_direct1.stdout diff --git a/gold/testsuite/x86_64_indirect_call_to_direct1.s b/gold/testsuite/x86_64_indirect_call_to_direct1.s new file mode 100644 index 0000000..5ca2e38 --- /dev/null +++ b/gold/testsuite/x86_64_indirect_call_to_direct1.s @@ -0,0 +1,12 @@ + .text + .globl foo + .type foo, @function +foo: + ret + .size foo, .-foo + .globl main + .type main, @function +main: + call *foo@GOTPCREL(%rip) + ret + .size main, .-main diff --git a/gold/testsuite/x86_64_indirect_jump_to_direct1.s b/gold/testsuite/x86_64_indirect_jump_to_direct1.s new file mode 100644 index 0000000..b817e34 --- /dev/null +++ b/gold/testsuite/x86_64_indirect_jump_to_direct1.s @@ -0,0 +1,11 @@ + .text + .globl foo + .type foo, @function +foo: + ret + .size foo, .-foo + .globl main + .type main, @function +main: + jmp *foo@GOTPCREL(%rip) + .size main, .-main diff --git a/gold/x86_64.cc b/gold/x86_64.cc index d069957..6c511e2 100644 --- a/gold/x86_64.cc +++ b/gold/x86_64.cc @@ -403,6 +403,33 @@ class Output_data_plt_x86_64_standard : public Output_data_plt_x86_64 static const unsigned char plt_eh_frame_fde[plt_eh_frame_fde_size]; }; +template +class Lazy_view +{ + public: + Lazy_view(Sized_relobj_file* object, unsigned int data_shndx) + : object_(object), data_shndx_(data_shndx), view_(NULL), view_size_(0) + { } + + inline unsigned char + operator[](size_t offset) + { + if (this->view_ == NULL) + this->view_ = this->object_->section_contents(this->data_shndx_, + &this->view_size_, + true); + if (offset >= this->view_size_) + return 0; + return this->view_[offset]; + } + + private: + Sized_relobj_file* object_; + unsigned int data_shndx_; + const unsigned char* view_; + section_size_type view_size_; +}; + // The x86_64 target class. // See the ABI at // http://www.x86-64.org/documentation/abi.pdf @@ -876,19 +903,62 @@ class Target_x86_64 : public Sized_target // conversion from // mov foo@GOTPCREL(%rip), %reg // to lea foo(%rip), %reg. - static bool - can_convert_mov_to_lea(const Symbol* gsym) + template + static inline bool + can_convert_mov_to_lea(const Symbol* gsym, unsigned int r_type, + size_t r_offset, View_type* view) { gold_assert(gsym != NULL); - return (gsym->type() != elfcpp::STT_GNU_IFUNC - && !gsym->is_undefined () - && !gsym->is_from_dynobj() - && !gsym->is_preemptible() - && (!parameters->options().shared() - || (gsym->visibility() != elfcpp::STV_DEFAULT - && gsym->visibility() != elfcpp::STV_PROTECTED) - || parameters->options().Bsymbolic()) - && strcmp(gsym->name(), "_DYNAMIC") != 0); + // We cannot do the conversion unless it's one of these relocations. + if (r_type != elfcpp::R_X86_64_GOTPCREL + && r_type != elfcpp::R_X86_64_GOTPCRELX + && r_type != elfcpp::R_X86_64_REX_GOTPCRELX) + return false; + // We cannot convert references to IFUNC symbols, or to symbols that + // are not local to the current module. + if (gsym->type() == elfcpp::STT_GNU_IFUNC + || gsym->is_undefined () + || gsym->is_from_dynobj() + || gsym->is_preemptible()) + return false; + // If we are building a shared object and the symbol is protected, we may + // need to go through the GOT. + if (parameters->options().shared() + && gsym->visibility() == elfcpp::STV_PROTECTED) + return false; + // We cannot convert references to the _DYNAMIC symbol. + if (strcmp(gsym->name(), "_DYNAMIC") == 0) + return false; + // Check for a MOV opcode. + return (*view)[r_offset - 2] == 0x8b; + } + + // Convert + // callq *foo@GOTPCRELX(%rip) to + // addr32 callq foo + // and jmpq *foo@GOTPCRELX(%rip) to + // jmpq foo + // nop + template + static inline bool + can_convert_callq_to_direct(const Symbol* gsym, unsigned int r_type, + size_t r_offset, View_type* view) + { + gold_assert(gsym != NULL); + // We cannot do the conversion unless it's a GOTPCRELX relocation. + if (r_type != elfcpp::R_X86_64_GOTPCRELX) + return false; + // We cannot convert references to IFUNC symbols, or to symbols that + // are not local to the current module. + if (gsym->type() == elfcpp::STT_GNU_IFUNC + || gsym->is_undefined () + || gsym->is_from_dynobj() + || gsym->is_preemptible()) + return false; + // Check for a CALLQ or JMPQ opcode. + return ((*view)[r_offset - 2] == 0xff + && ((*view)[r_offset - 1] == 0x15 + || (*view)[r_offset - 1] == 0x25)); } // Adjust TLS relocation type based on the options and whether this @@ -2935,19 +3005,24 @@ Target_x86_64::Scan::global(Symbol_table* symtab, // If we convert this from // mov foo@GOTPCREL(%rip), %reg // to lea foo(%rip), %reg. + // OR + // if we convert + // (callq|jmpq) *foo@GOTPCRELX(%rip) to + // (callq|jmpq) foo // in Relocate::relocate, then there is nothing to do here. - if ((r_type == elfcpp::R_X86_64_GOTPCREL - || r_type == elfcpp::R_X86_64_GOTPCRELX - || r_type == elfcpp::R_X86_64_REX_GOTPCRELX) - && reloc.get_r_offset() >= 2 - && Target_x86_64::can_convert_mov_to_lea(gsym)) - { - section_size_type stype; - const unsigned char* view = object->section_contents(data_shndx, - &stype, true); - if (view[reloc.get_r_offset() - 2] == 0x8b) - break; - } + + Lazy_view view(object, data_shndx); + size_t r_offset = reloc.get_r_offset(); + if (r_offset >= 2 + && Target_x86_64::can_convert_mov_to_lea(gsym, r_type, + r_offset, &view)) + break; + + if (r_offset >= 2 + && Target_x86_64::can_convert_callq_to_direct(gsym, r_type, + r_offset, + &view)) + break; if (gsym->final_value_is_known()) { @@ -3629,15 +3704,56 @@ Target_x86_64::Relocate::relocate( // mov foo@GOTPCREL(%rip), %reg // to lea foo(%rip), %reg. // if possible. - if (rela.get_r_offset() >= 2 - && view[-2] == 0x8b - && ((gsym == NULL && !psymval->is_ifunc_symbol()) - || (gsym != NULL - && Target_x86_64::can_convert_mov_to_lea(gsym)))) + if ((gsym == NULL + && rela.get_r_offset() >= 2 + && view[-2] == 0x8b + && !psymval->is_ifunc_symbol()) + || (gsym != NULL + && rela.get_r_offset() >= 2 + && Target_x86_64::can_convert_mov_to_lea(gsym, r_type, + 0, &view))) { view[-2] = 0x8d; Reloc_funcs::pcrela32(view, object, psymval, addend, address); } + // Convert + // callq *foo@GOTPCRELX(%rip) to + // addr32 callq foo + // and jmpq *foo@GOTPCRELX(%rip) to + // jmpq foo + // nop + else if (gsym != NULL + && rela.get_r_offset() >= 2 + && Target_x86_64::can_convert_callq_to_direct(gsym, + r_type, + 0, &view)) + { + if (view[-1] == 0x15) + { + // Convert callq *foo@GOTPCRELX(%rip) to addr32 callq. + // Opcode of addr32 is 0x67 and opcode of direct callq is 0xe8. + view[-2] = 0x67; + view[-1] = 0xe8; + // Convert GOTPCRELX to 32-bit pc relative reloc. + Reloc_funcs::pcrela32(view, object, psymval, addend, address); + } + else + { + // Convert jmpq *foo@GOTPCRELX(%rip) to + // jmpq foo + // nop + // The opcode of direct jmpq is 0xe9. + view[-2] = 0xe9; + // The opcode of nop is 0x90. + view[3] = 0x90; + // Convert GOTPCRELX to 32-bit pc relative reloc. jmpq is rip + // relative and since the instruction following the jmpq is now + // the nop, offset the address by 1 byte. The start of the + // relocation also moves ahead by 1 byte. + Reloc_funcs::pcrela32(&view[-1], object, psymval, addend, + address - 1); + } + } else { if (gsym != NULL) -- 2.7.4