From c316a17c40e44e8798b34ff84130904f2e7a53de Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Mon, 9 Nov 2015 15:03:29 +1030 Subject: [PATCH] Fix performance regression due to ld -r memmove The idea here is that instead of using memmove to shuffle the relocs array every time one is deleted, to add a "wrel" pointer and copy from rel[0] to wrel[0] as we go. * elf64-ppc.c (ppc64_elf_relocate_section): Use read and write pointers to reloc array, rather than memmove when deleting a reloc. Don't use RELOC_AGAINST_DISCARDED_SECTION. Adjust reloc counts at end of loop. * elf32-ppc.c (ppc_elf_relocate_section): Likewise. --- bfd/ChangeLog | 8 ++++ bfd/elf32-ppc.c | 122 ++++++++++++++++++++++++++++++++++++++------------------ bfd/elf64-ppc.c | 96 ++++++++++++++++++++++++++++++-------------- 3 files changed, 157 insertions(+), 69 deletions(-) diff --git a/bfd/ChangeLog b/bfd/ChangeLog index 60d13b9..d46bbe0 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,11 @@ +2015-11-10 Alan Modra + + * elf64-ppc.c (ppc64_elf_relocate_section): Use read and write + pointers to reloc array, rather than memmove when deleting a + reloc. Don't use RELOC_AGAINST_DISCARDED_SECTION. Adjust + reloc counts at end of loop. + * elf32-ppc.c (ppc_elf_relocate_section): Likewise. + 2015-11-09 Dominik Vogt * elf64-ppc.c (ppc64_elf_size_stubs, ppc64_elf_build_stubs): Fix left diff --git a/bfd/elf32-ppc.c b/bfd/elf32-ppc.c index 708076d..5c26077 100644 --- a/bfd/elf32-ppc.c +++ b/bfd/elf32-ppc.c @@ -7650,6 +7650,7 @@ ppc_elf_relocate_section (bfd *output_bfd, struct elf_link_hash_entry **sym_hashes; struct ppc_elf_link_hash_table *htab; Elf_Internal_Rela *rel; + Elf_Internal_Rela *wrel; Elf_Internal_Rela *relend; Elf_Internal_Rela outrel; asection *got2; @@ -7685,9 +7686,9 @@ ppc_elf_relocate_section (bfd *output_bfd, ".tls_vars")); if (input_section->sec_info_type == SEC_INFO_TYPE_TARGET) relax_info = elf_section_data (input_section)->sec_info; - rel = relocs; + rel = wrel = relocs; relend = relocs + input_section->reloc_count; - for (; rel < relend; rel++) + for (; rel < relend; wrel++, rel++) { enum elf_ppc_reloc_type r_type; bfd_vma addend; @@ -7706,6 +7707,7 @@ ppc_elf_relocate_section (bfd *output_bfd, struct plt_entry **ifunc; struct reloc_howto_struct alt_howto; + again: r_type = ELF32_R_TYPE (rel->r_info); sym = NULL; sec = NULL; @@ -7742,8 +7744,22 @@ ppc_elf_relocate_section (bfd *output_bfd, howto = NULL; if (r_type < R_PPC_max) howto = ppc_elf_howto_table[r_type]; - RELOC_AGAINST_DISCARDED_SECTION (info, input_bfd, input_section, - rel, 1, relend, howto, 0, contents); + + _bfd_clear_contents (howto, input_bfd, input_section, + contents + rel->r_offset); + wrel->r_offset = rel->r_offset; + wrel->r_info = 0; + wrel->r_addend = 0; + + /* For ld -r, remove relocations in debug sections against + sections defined in discarded sections. Not done for + non-debug to preserve relocs in .eh_frame which the + eh_frame editing code expects to be present. */ + if (bfd_link_relocatable (info) + && (input_section->flags & SEC_DEBUGGING)) + wrel--; + + continue; } if (bfd_link_relocatable (info)) @@ -7759,7 +7775,7 @@ ppc_elf_relocate_section (bfd *output_bfd, if (r_type != R_PPC_RELAX_PLT && r_type != R_PPC_RELAX_PLTREL24 && r_type != R_PPC_RELAX) - continue; + goto copy_reloc; } /* TLS optimizations. Replace instruction sequences and relocs @@ -7802,10 +7818,12 @@ ppc_elf_relocate_section (bfd *output_bfd, { bfd_vma insn; - insn = bfd_get_32 (output_bfd, contents + rel->r_offset - d_offset); + insn = bfd_get_32 (output_bfd, + contents + rel->r_offset - d_offset); insn &= 31 << 21; insn |= 0x3c020000; /* addis 0,2,0 */ - bfd_put_32 (output_bfd, insn, contents + rel->r_offset - d_offset); + bfd_put_32 (output_bfd, insn, + contents + rel->r_offset - d_offset); r_type = R_PPC_TPREL16_HA; rel->r_info = ELF32_R_INFO (r_symndx, r_type); } @@ -7941,8 +7959,7 @@ ppc_elf_relocate_section (bfd *output_bfd, { /* We changed the symbol on an LD reloc. Start over in order to get h, sym, sec etc. right. */ - rel--; - continue; + goto again; } } break; @@ -8000,8 +8017,7 @@ ppc_elf_relocate_section (bfd *output_bfd, /* Zap the reloc on the _tls_get_addr call too. */ BFD_ASSERT (rel->r_offset - d_offset == rel[1].r_offset); rel[1].r_info = ELF32_R_INFO (STN_UNDEF, R_PPC_NONE); - rel--; - continue; + goto again; } break; } @@ -8080,9 +8096,9 @@ ppc_elf_relocate_section (bfd *output_bfd, got_addr = (htab->got->output_section->vma + htab->got->output_offset + (h->got.offset & ~1)); - rel->r_info = ELF32_R_INFO (0, R_PPC_ADDR16_HA); - rel->r_addend = got_addr; - rel->r_offset = (p - contents) + d_offset; + wrel->r_offset = (p - contents) + d_offset; + wrel->r_info = ELF32_R_INFO (0, R_PPC_ADDR16_HA); + wrel->r_addend = got_addr; insn &= ~0xffff; insn |= ((unsigned int )(got_addr + 0x8000) >> 16) & 0xffff; bfd_put_32 (output_bfd, insn, p); @@ -8100,9 +8116,10 @@ ppc_elf_relocate_section (bfd *output_bfd, /* Use one of the spare relocs, so --emit-relocs output is reasonable. */ memmove (rel + 1, rel, (relend - rel - 1) * sizeof (*rel)); - rel++; + wrel++, rel++; + rel->r_offset = wrel[-1].r_offset + 4; rel->r_info = ELF32_R_INFO (0, R_PPC_ADDR16_LO); - rel->r_offset += 4; + rel->r_addend = wrel[-1].r_addend; /* Continue on as if we had a got reloc, to output dynamic reloc. */ @@ -8236,7 +8253,7 @@ ppc_elf_relocate_section (bfd *output_bfd, bfd_set_error (bfd_error_bad_value); ret = FALSE; - continue; + goto copy_reloc; case R_PPC_NONE: case R_PPC_TLS: @@ -8245,7 +8262,7 @@ ppc_elf_relocate_section (bfd *output_bfd, case R_PPC_EMB_MRKREF: case R_PPC_GNU_VTINHERIT: case R_PPC_GNU_VTENTRY: - continue; + goto copy_reloc; /* GOT16 relocations. Like an ADDR16 using the symbol's address in the GOT as relocation value instead of the @@ -8496,7 +8513,7 @@ ppc_elf_relocate_section (bfd *output_bfd, /* If here for a picfixup, we're done. */ if (r_type != ELF32_R_TYPE (rel->r_info)) - continue; + goto copy_reloc; relocation = (htab->got->output_section->vma + htab->got->output_offset @@ -8529,7 +8546,7 @@ ppc_elf_relocate_section (bfd *output_bfd, rel->r_offset, TRUE)) return FALSE; - continue; + goto copy_reloc; } break; @@ -8768,7 +8785,7 @@ ppc_elf_relocate_section (bfd *output_bfd, bfd_elf32_swap_reloca_out (output_bfd, &outrel, loc); if (skip == -1) - continue; + goto copy_reloc; /* This reloc will be computed at runtime. We clear the memory so that it contains predictable value. */ @@ -8861,12 +8878,13 @@ ppc_elf_relocate_section (bfd *output_bfd, relocs to describe this relocation. */ BFD_ASSERT (ELF32_R_TYPE (relend[-1].r_info) == R_PPC_NONE); /* The relocs are at the bottom 2 bytes */ - rel[0].r_offset += d_offset; - memmove (rel + 1, rel, (relend - rel - 1) * sizeof (*rel)); - rel[0].r_info = ELF32_R_INFO (r_symndx, R_PPC_ADDR16_HA); - rel[1].r_offset += 4; - rel[1].r_info = ELF32_R_INFO (r_symndx, R_PPC_ADDR16_LO); - rel++; + wrel->r_offset = rel->r_offset + d_offset; + wrel->r_info = ELF32_R_INFO (r_symndx, R_PPC_ADDR16_HA); + wrel->r_addend = rel->r_addend; + memmove (wrel + 1, wrel, (relend - wrel - 1) * sizeof (*wrel)); + wrel++, rel++; + wrel->r_offset += 4; + wrel->r_info = ELF32_R_INFO (r_symndx, R_PPC_ADDR16_LO); } continue; @@ -9014,37 +9032,37 @@ ppc_elf_relocate_section (bfd *output_bfd, relocation = relocation + addend; ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset, relocation, split16a_type); - continue; + goto copy_reloc; case R_PPC_VLE_LO16D: relocation = relocation + addend; ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset, relocation, split16d_type); - continue; + goto copy_reloc; case R_PPC_VLE_HI16A: relocation = (relocation + addend) >> 16; ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset, relocation, split16a_type); - continue; + goto copy_reloc; case R_PPC_VLE_HI16D: relocation = (relocation + addend) >> 16; ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset, relocation, split16d_type); - continue; + goto copy_reloc; case R_PPC_VLE_HA16A: relocation = (relocation + addend + 0x8000) >> 16; ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset, relocation, split16a_type); - continue; + goto copy_reloc; case R_PPC_VLE_HA16D: relocation = (relocation + addend + 0x8000) >> 16; ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset, relocation, split16d_type); - continue; + goto copy_reloc; /* Relocate against either _SDA_BASE_, _SDA2_BASE_, or 0. */ case R_PPC_EMB_SDA21: @@ -9093,7 +9111,7 @@ ppc_elf_relocate_section (bfd *output_bfd, bfd_set_error (bfd_error_bad_value); ret = FALSE; - continue; + goto copy_reloc; } if (sda != NULL) @@ -9131,7 +9149,7 @@ ppc_elf_relocate_section (bfd *output_bfd, if (r_type == R_PPC_VLE_SDA21 && ((relocation + 0x80000) & 0xffffffff) > 0x100000) goto overflow; - continue; + goto copy_reloc; } else if (r_type == R_PPC_EMB_SDA21 || r_type == R_PPC_VLE_SDA21 @@ -9187,7 +9205,7 @@ ppc_elf_relocate_section (bfd *output_bfd, bfd_set_error (bfd_error_bad_value); ret = FALSE; - continue; + goto copy_reloc; } if (sda != NULL) @@ -9234,7 +9252,7 @@ ppc_elf_relocate_section (bfd *output_bfd, value, split16d_type); } } - continue; + goto copy_reloc; /* Relocate against the beginning of the section. */ case R_PPC_SECTOFF: @@ -9282,7 +9300,7 @@ ppc_elf_relocate_section (bfd *output_bfd, bfd_set_error (bfd_error_invalid_operation); ret = FALSE; - continue; + goto copy_reloc; } /* Do any further special processing. */ @@ -9342,7 +9360,8 @@ ppc_elf_relocate_section (bfd *output_bfd, that make up part of the insn opcode. */ unsigned int insn, mask, lobit; - insn = bfd_get_32 (output_bfd, contents + rel->r_offset - d_offset); + insn = bfd_get_32 (output_bfd, + contents + rel->r_offset - d_offset); mask = 0; if (is_insn_ds_form (insn)) mask = 3; @@ -9452,6 +9471,31 @@ ppc_elf_relocate_section (bfd *output_bfd, ret = FALSE; } } + copy_reloc: + if (wrel != rel) + *wrel = *rel; + } + + if (wrel != rel) + { + Elf_Internal_Shdr *rel_hdr; + size_t deleted = rel - wrel; + + rel_hdr = _bfd_elf_single_rel_hdr (input_section->output_section); + rel_hdr->sh_size -= rel_hdr->sh_entsize * deleted; + if (rel_hdr->sh_size == 0) + { + /* It is too late to remove an empty reloc section. Leave + one NONE reloc. + ??? What is wrong with an empty section??? */ + rel_hdr->sh_size = rel_hdr->sh_entsize; + deleted -= 1; + wrel++; + } + relend = wrel; + rel_hdr = _bfd_elf_single_rel_hdr (input_section); + rel_hdr->sh_size -= rel_hdr->sh_entsize * deleted; + input_section->reloc_count -= deleted; } #ifdef DEBUG diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c index 0a85ab8..f491a09 100644 --- a/bfd/elf64-ppc.c +++ b/bfd/elf64-ppc.c @@ -13162,6 +13162,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, Elf_Internal_Shdr *symtab_hdr; struct elf_link_hash_entry **sym_hashes; Elf_Internal_Rela *rel; + Elf_Internal_Rela *wrel; Elf_Internal_Rela *relend; Elf_Internal_Rela outrel; bfd_byte *loc; @@ -13193,9 +13194,9 @@ ppc64_elf_relocate_section (bfd *output_bfd, sym_hashes = elf_sym_hashes (input_bfd); is_opd = ppc64_elf_section_data (input_section)->sec_type == sec_opd; - rel = relocs; + rel = wrel = relocs; relend = relocs + input_section->reloc_count; - for (; rel < relend; rel++) + for (; rel < relend; wrel++, rel++) { enum elf_ppc64_reloc_type r_type; bfd_vma addend; @@ -13219,10 +13220,13 @@ ppc64_elf_relocate_section (bfd *output_bfd, struct ppc_stub_hash_entry *stub_entry; bfd_vma max_br_offset; bfd_vma from; - const Elf_Internal_Rela orig_rel = *rel; + Elf_Internal_Rela orig_rel; reloc_howto_type *howto; struct reloc_howto_struct alt_howto; + again: + orig_rel = *rel; + r_type = ELF64_R_TYPE (rel->r_info); r_symndx = ELF64_R_SYM (rel->r_info); @@ -13230,10 +13234,10 @@ ppc64_elf_relocate_section (bfd *output_bfd, symbol of the previous ADDR64 reloc. The symbol gives us the proper TOC base to use. */ if (rel->r_info == ELF64_R_INFO (0, R_PPC64_TOC) - && rel != relocs - && ELF64_R_TYPE (rel[-1].r_info) == R_PPC64_ADDR64 + && wrel != relocs + && ELF64_R_TYPE (wrel[-1].r_info) == R_PPC64_ADDR64 && is_opd) - r_symndx = ELF64_R_SYM (rel[-1].r_info); + r_symndx = ELF64_R_SYM (wrel[-1].r_info); sym = NULL; sec = NULL; @@ -13314,13 +13318,27 @@ ppc64_elf_relocate_section (bfd *output_bfd, h = (struct ppc_link_hash_entry *) h_elf; if (sec != NULL && discarded_section (sec)) - RELOC_AGAINST_DISCARDED_SECTION (info, input_bfd, input_section, - rel, 1, relend, - ppc64_elf_howto_table[r_type], 0, - contents); + { + _bfd_clear_contents (ppc64_elf_howto_table[r_type], + input_bfd, input_section, + contents + rel->r_offset); + wrel->r_offset = rel->r_offset; + wrel->r_info = 0; + wrel->r_addend = 0; + + /* For ld -r, remove relocations in debug sections against + sections defined in discarded sections. Not done for + non-debug to preserve relocs in .eh_frame which the + eh_frame editing code expects to be present. */ + if (bfd_link_relocatable (info) + && (input_section->flags & SEC_DEBUGGING)) + wrel--; + + continue; + } if (bfd_link_relocatable (info)) - continue; + goto copy_reloc; if (h != NULL && &h->elf == htab->elf.hgot) { @@ -13480,10 +13498,12 @@ ppc64_elf_relocate_section (bfd *output_bfd, && (tls_mask & TLS_TPREL) == 0) { toctprel: - insn = bfd_get_32 (output_bfd, contents + rel->r_offset - d_offset); + insn = bfd_get_32 (output_bfd, + contents + rel->r_offset - d_offset); insn &= 31 << 21; insn |= 0x3c0d0000; /* addis 0,13,0 */ - bfd_put_32 (output_bfd, insn, contents + rel->r_offset - d_offset); + bfd_put_32 (output_bfd, insn, + contents + rel->r_offset - d_offset); r_type = R_PPC64_TPREL16_HA; if (toc_symndx != 0) { @@ -13491,8 +13511,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, rel->r_addend = toc_addend; /* We changed the symbol. Start over in order to get h, sym, sec etc. right. */ - rel--; - continue; + goto again; } else rel->r_info = ELF64_R_INFO (r_symndx, r_type); @@ -13518,8 +13537,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, rel->r_addend = toc_addend; /* We changed the symbol. Start over in order to get h, sym, sec etc. right. */ - rel--; - continue; + goto again; } else rel->r_info = ELF64_R_INFO (r_symndx, r_type); @@ -13658,8 +13676,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, { /* We changed the symbol. Start over in order to get h, sym, sec etc. right. */ - rel--; - continue; + goto again; } } break; @@ -13703,10 +13720,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, } bfd_put_32 (output_bfd, insn2, contents + offset); if ((tls_mask & TLS_TPRELGD) == 0 && toc_symndx != 0) - { - rel--; - continue; - } + goto again; } break; @@ -13748,8 +13762,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, insn2 = NOP; } bfd_put_32 (output_bfd, insn2, contents + offset); - rel--; - continue; + goto again; } break; @@ -14091,7 +14104,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, && addend == 0) { bfd_put_32 (output_bfd, NOP, contents + rel->r_offset); - continue; + goto copy_reloc; } break; } @@ -14107,7 +14120,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, bfd_set_error (bfd_error_bad_value); ret = FALSE; - continue; + goto copy_reloc; case R_PPC64_NONE: case R_PPC64_TLS: @@ -14116,7 +14129,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, case R_PPC64_TOCSAVE: case R_PPC64_GNU_VTINHERIT: case R_PPC64_GNU_VTENTRY: - continue; + goto copy_reloc; /* GOT16 relocations. Like an ADDR16 using the symbol's address in the GOT as relocation value instead of the @@ -14752,7 +14765,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, bfd_set_error (bfd_error_invalid_operation); ret = FALSE; - continue; + goto copy_reloc; } /* Multi-instruction sequences that access the TOC can be @@ -14901,7 +14914,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, mask + 1); bfd_set_error (bfd_error_bad_value); ret = FALSE; - continue; + goto copy_reloc; } break; } @@ -15000,6 +15013,29 @@ ppc64_elf_relocate_section (bfd *output_bfd, if (more_info != NULL) free (more_info); } + copy_reloc: + if (wrel != rel) + *wrel = *rel; + } + + if (wrel != rel) + { + Elf_Internal_Shdr *rel_hdr; + size_t deleted = rel - wrel; + + rel_hdr = _bfd_elf_single_rel_hdr (input_section->output_section); + rel_hdr->sh_size -= rel_hdr->sh_entsize * deleted; + if (rel_hdr->sh_size == 0) + { + /* It is too late to remove an empty reloc section. Leave + one NONE reloc. + ??? What is wrong with an empty section??? */ + rel_hdr->sh_size = rel_hdr->sh_entsize; + deleted -= 1; + } + rel_hdr = _bfd_elf_single_rel_hdr (input_section); + rel_hdr->sh_size -= rel_hdr->sh_entsize * deleted; + input_section->reloc_count -= deleted; } /* If we're emitting relocations, then shortly after this function -- 2.7.4