bfd/
authorAlan Modra <amodra@gmail.com>
Wed, 11 Jan 2012 09:28:45 +0000 (09:28 +0000)
committerAlan Modra <amodra@gmail.com>
Wed, 11 Jan 2012 09:28:45 +0000 (09:28 +0000)
* elf64-ppc.c: Define more insns used in plt call stubs.
(ppc64_elf_brtaken_reloc): Assume isa version 2 or above.
(ppc64_elf_relocate_section): Likewise.
(enum ppc_stub_type): Add ppc_stub_plt_call_r2save.
(struct ppc_link_hash_table): Increase size of stub_count array.
Add plt_stub_align and plt_thread_safe.
(ALWAYS_USE_FAKE_DEP, ALWAYS_EMIT_R2SAVE): Define.
(plt_stub_size, plt_stub_pad): New functions.
(build_plt_stub): Emit barriers for power7 thread safety.  Don't
emit needless save of r2.
(build_tls_get_addr_stub): Adjust params.
(ppc_build_one_stub): Handle ppc_stub_plt_call_r2save and aligning
plt stubs.  Adjust build_*plt_stub calls.
(ppc_size_one_stub): Similarly.
(ppc64_elf_size_stubs): Accept plt_thread_safe and plt_stub_align
params.  Choose default for plt_thread_safe based on existence of
calls to thread creation functions.  Modify plt_call to
plt_call_r2save when no tocsave reloc found.  Align tail of stub
sections.
(ppc64_elf_build_stubs): Align tail of stub sections.  Adjust
output of stub statistics.
(ppc64_elf_relocate_section): Handle ppc_stub_plt_call_r2save.
* elf64-ppc.h (ppc64_elf_size_stubs): Update prototype.
ld/
* emultempl/ppc64elf.em (PARSE_AND_LIST_PROLOGUE,
PARSE_AND_LIST_LONGOPTS, PARSE_AND_LIST_OPTIONS,
PARSE_AND_LIST_ARGS_CASES): Handle --{no-,}plt-thread-safe and
--{no-,}plt-align.
(plt_thread_safe, plt_stub_align): New vars.
(gld${EMULATION_NAME}_after_allocation): Pass them to
ppc64_elf_size_stubs.  Align stub sections according to plt_stub_align.
* ld.texinfo: Document new command line options, and an old
undocumented option.

bfd/ChangeLog
bfd/elf64-ppc.c
bfd/elf64-ppc.h
ld/ChangeLog
ld/emultempl/ppc64elf.em
ld/ld.texinfo

index a8373ff..1698e62 100644 (file)
@@ -1,3 +1,29 @@
+2012-01-11  Alan Modra  <amodra@gmail.com>
+
+       * elf64-ppc.c: Define more insns used in plt call stubs.
+       (ppc64_elf_brtaken_reloc): Assume isa version 2 or above.
+       (ppc64_elf_relocate_section): Likewise.
+       (enum ppc_stub_type): Add ppc_stub_plt_call_r2save.
+       (struct ppc_link_hash_table): Increase size of stub_count array.
+       Add plt_stub_align and plt_thread_safe.
+       (ALWAYS_USE_FAKE_DEP, ALWAYS_EMIT_R2SAVE): Define.
+       (plt_stub_size, plt_stub_pad): New functions.
+       (build_plt_stub): Emit barriers for power7 thread safety.  Don't
+       emit needless save of r2.
+       (build_tls_get_addr_stub): Adjust params.
+       (ppc_build_one_stub): Handle ppc_stub_plt_call_r2save and aligning
+       plt stubs.  Adjust build_*plt_stub calls.
+       (ppc_size_one_stub): Similarly.
+       (ppc64_elf_size_stubs): Accept plt_thread_safe and plt_stub_align
+       params.  Choose default for plt_thread_safe based on existence of
+       calls to thread creation functions.  Modify plt_call to
+       plt_call_r2save when no tocsave reloc found.  Align tail of stub
+       sections.
+       (ppc64_elf_build_stubs): Align tail of stub sections.  Adjust
+       output of stub statistics.
+       (ppc64_elf_relocate_section): Handle ppc_stub_plt_call_r2save.
+       * elf64-ppc.h (ppc64_elf_size_stubs): Update prototype.
+
 2012-01-10  H.J. Lu  <hongjiu.lu@intel.com>
 
        PR ld/13581
index 4560599..9c212b9 100644 (file)
@@ -1,6 +1,6 @@
 /* PowerPC64-specific support for 64-bit ELF.
    Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
-   2009, 2010, 2011 Free Software Foundation, Inc.
+   2009, 2010, 2011, 2012 Free Software Foundation, Inc.
    Written by Linus Nordberg, Swox AB <info@swox.com>,
    based on elf32-ppc.c by Ian Lance Taylor.
    Largely rewritten by Alan Modra.
@@ -152,6 +152,13 @@ static bfd_vma opd_entry_value
 #define ADDIS_R2_R2    0x3c420000      /* addis %r2,%r2,off@ha  */
 #define ADDI_R2_R2     0x38420000      /* addi  %r2,%r2,off@l   */
 
+#define XOR_R11_R11_R11        0x7d6b5a78      /* xor   %r11,%r11,%r11  */
+#define ADD_R12_R12_R11        0x7d8c5a14      /* add   %r12,%r12,%r11  */
+#define ADD_R2_R2_R11  0x7c425a14      /* add   %r2,%r2,%r11    */
+#define CMPLDI_R2_0    0x28220000      /* cmpldi %r2,0          */
+#define BNECTR         0x4ca20420      /* bnectr+               */
+#define BNECTR_P4      0x4ce20420      /* bnectr+               */
+
 #define LD_R11_0R2     0xe9620000      /* ld    %r11,xxx+0(%r2) */
 #define LD_R2_0R2      0xe8420000      /* ld    %r2,xxx+0(%r2)  */
 
@@ -2356,8 +2363,8 @@ ppc64_elf_brtaken_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
   long insn;
   enum elf_ppc64_reloc_type r_type;
   bfd_size_type octets;
-  /* Disabled until we sort out how ld should choose 'y' vs 'at'.  */
-  bfd_boolean is_power4 = FALSE;
+  /* Assume 'at' branch hints.  */
+  bfd_boolean is_isa_v2 = TRUE;
 
   /* If this is a relocatable link (output_bfd test tells us), just
      call the generic function.  Any adjustment will be done at final
@@ -2374,7 +2381,7 @@ ppc64_elf_brtaken_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
       || r_type == R_PPC64_REL14_BRTAKEN)
     insn |= 0x01 << 21; /* 'y' or 't' bit, lowest bit of BO field.  */
 
-  if (is_power4)
+  if (is_isa_v2)
     {
       /* Set 'a' bit.  This is 0b00010 in BO field for branch
         on CR(BI) insns (BO == 001at or 011at), and 0b01000
@@ -3584,7 +3591,8 @@ enum ppc_stub_type {
   ppc_stub_long_branch_r2off,
   ppc_stub_plt_branch,
   ppc_stub_plt_branch_r2off,
-  ppc_stub_plt_call
+  ppc_stub_plt_call,
+  ppc_stub_plt_call_r2save
 };
 
 struct ppc_stub_hash_entry {
@@ -3752,14 +3760,20 @@ struct ppc_link_hash_table
   bfd_size_type got_reli_size;
 
   /* Statistics.  */
-  unsigned long stub_count[ppc_stub_plt_call];
+  unsigned long stub_count[ppc_stub_plt_call_r2save];
 
   /* Number of stubs against global syms.  */
   unsigned long stub_globals;
 
+  /* Alignment of PLT call stubs.  */
+  unsigned int plt_stub_align:4;
+
   /* Set if PLT call stubs should load r11.  */
   unsigned int plt_static_chain:1;
 
+  /* Set if PLT call stubs need a read-read barrier.  */
+  unsigned int plt_thread_safe:1;
+
   /* Set if we should emit symbols for stubs.  */
   unsigned int emit_stub_syms:1;
 
@@ -9471,21 +9485,126 @@ ppc_type_of_stub (asection *input_sec,
   return ppc_stub_none;
 }
 
-/* Build a .plt call stub.  */
+/* With power7 weakly ordered memory model, it is possible for ld.so
+   to update a plt entry in one thread and have another thread see a
+   stale zero toc entry.  To avoid this we need some sort of acquire
+   barrier in the call stub.  One solution is to make the load of the
+   toc word seem to appear to depend on the load of the function entry
+   word.  Another solution is to test for r2 being zero, and branch to
+   the appropriate glink entry if so.
+
+   .   fake dep barrier        compare
+   .   ld 11,xxx(2)            ld 11,xxx(2)
+   .   mtctr 11                mtctr 11
+   .   xor 11,11,11            ld 2,xxx+8(2)
+   .   add 2,2,11              cmpldi 2,0
+   .   ld 2,xxx+8(2)           bnectr+
+   .   bctr                    b <glink_entry>
+
+   The solution involving the compare turns out to be faster, so
+   that's what we use unless the branch won't reach.  */
+
+#define ALWAYS_USE_FAKE_DEP 0
+#define ALWAYS_EMIT_R2SAVE 0
 
-static inline bfd_byte *
-build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
-               bfd_boolean plt_static_chain)
-{
 #define PPC_LO(v) ((v) & 0xffff)
 #define PPC_HI(v) (((v) >> 16) & 0xffff)
 #define PPC_HA(v) PPC_HI ((v) + 0x8000)
 
+static inline unsigned int
+plt_stub_size (struct ppc_link_hash_table *htab,
+              struct ppc_stub_hash_entry *stub_entry,
+              bfd_vma off)
+{
+  unsigned size = PLT_CALL_STUB_SIZE;
+
+  if (!(ALWAYS_EMIT_R2SAVE
+       || stub_entry->stub_type == ppc_stub_plt_call_r2save))
+    size -= 4;
+  if (!htab->plt_static_chain)
+    size -= 4;
+  if (htab->plt_thread_safe)
+    size += 8;
+  if (PPC_HA (off) == 0)
+    size -= 4;
+  if (PPC_HA (off + 8 + 8 * htab->plt_static_chain) != PPC_HA (off))
+    size += 4;
+  if (stub_entry->h != NULL
+      && (stub_entry->h == htab->tls_get_addr_fd
+         || stub_entry->h == htab->tls_get_addr)
+      && !htab->no_tls_get_addr_opt)
+    size += 13 * 4;
+  return size;
+}
+
+/* If this stub would cross fewer 2**plt_stub_align boundaries if we align,
+   then return the padding needed to do so.  */
+static inline unsigned int
+plt_stub_pad (struct ppc_link_hash_table *htab,
+             struct ppc_stub_hash_entry *stub_entry,
+             bfd_vma plt_off)
+{
+  int stub_align = 1 << htab->plt_stub_align;
+  unsigned stub_size = plt_stub_size (htab, stub_entry, plt_off);
+  bfd_vma stub_off = stub_entry->stub_sec->size;
+
+  if (((stub_off + stub_size - 1) & -stub_align) - (stub_off & -stub_align)
+      > (stub_size & -stub_align))
+    return stub_align - (stub_off & (stub_align - 1));
+  return 0;
+}
+
+/* Build a .plt call stub.  */
+
+static inline bfd_byte *
+build_plt_stub (struct ppc_link_hash_table *htab,
+               struct ppc_stub_hash_entry *stub_entry,
+               bfd_byte *p, bfd_vma offset, Elf_Internal_Rela *r)
+{
+  bfd *obfd = htab->stub_bfd;
+  bfd_boolean plt_static_chain = htab->plt_static_chain;
+  bfd_boolean plt_thread_safe = htab->plt_thread_safe;
+  bfd_boolean use_fake_dep = plt_thread_safe;
+  bfd_vma cmp_branch_off = 0;
+
+  if (!ALWAYS_USE_FAKE_DEP
+      && plt_thread_safe
+      && !(stub_entry->h != NULL
+          && (stub_entry->h == htab->tls_get_addr_fd
+              || stub_entry->h == htab->tls_get_addr)
+          && !htab->no_tls_get_addr_opt))
+    {
+      bfd_vma pltoff = stub_entry->plt_ent->plt.offset & ~1;
+      bfd_vma pltindex = (pltoff - PLT_INITIAL_ENTRY_SIZE) / PLT_ENTRY_SIZE;
+      bfd_vma glinkoff = GLINK_CALL_STUB_SIZE + pltindex * 8;
+      bfd_vma to, from;
+
+      if (pltindex > 32767)
+       glinkoff += (pltindex - 32767) * 4;
+      to = (glinkoff
+           + htab->glink->output_offset
+           + htab->glink->output_section->vma);
+      from = (p - stub_entry->stub_sec->contents
+             + 4 * (ALWAYS_EMIT_R2SAVE
+                    || stub_entry->stub_type == ppc_stub_plt_call_r2save)
+             + 4 * (PPC_HA (offset) != 0)
+             + 4 * (PPC_HA (offset + 8 + 8 * plt_static_chain)
+                    != PPC_HA (offset))
+             + 4 * (plt_static_chain != 0)
+             + 20
+             + stub_entry->stub_sec->output_offset
+             + stub_entry->stub_sec->output_section->vma);
+      cmp_branch_off = to - from;
+      use_fake_dep = cmp_branch_off + (1 << 25) >= (1 << 26);
+    }
+
   if (PPC_HA (offset) != 0)
     {
       if (r != NULL)
        {
-         r[0].r_offset += 4;
+         if (ALWAYS_EMIT_R2SAVE
+             || stub_entry->stub_type == ppc_stub_plt_call_r2save)
+           r[0].r_offset += 4;
          r[0].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_HA);
          r[1].r_offset = r[0].r_offset + 4;
          r[1].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_LO_DS);
@@ -9498,7 +9617,7 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
            }
          else
            {
-             r[2].r_offset = r[1].r_offset + 8;
+             r[2].r_offset = r[1].r_offset + 8 + 8 * use_fake_dep;
              r[2].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_LO_DS);
              r[2].r_addend = r[0].r_addend + 8;
              if (plt_static_chain)
@@ -9509,7 +9628,9 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
                }
            }
        }
-      bfd_put_32 (obfd, STD_R2_40R1, p),                       p += 4;
+      if (ALWAYS_EMIT_R2SAVE
+         || stub_entry->stub_type == ppc_stub_plt_call_r2save)
+       bfd_put_32 (obfd, STD_R2_40R1, p),                      p += 4;
       bfd_put_32 (obfd, ADDIS_R12_R2 | PPC_HA (offset), p),    p += 4;
       bfd_put_32 (obfd, LD_R11_0R12 | PPC_LO (offset), p),     p += 4;
       if (PPC_HA (offset + 8 + 8 * plt_static_chain) != PPC_HA (offset))
@@ -9518,16 +9639,22 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
          offset = 0;
        }
       bfd_put_32 (obfd, MTCTR_R11, p),                         p += 4;
+      if (use_fake_dep)
+       {
+         bfd_put_32 (obfd, XOR_R11_R11_R11, p),                p += 4;
+         bfd_put_32 (obfd, ADD_R12_R12_R11, p),                p += 4;
+       }
       bfd_put_32 (obfd, LD_R2_0R12 | PPC_LO (offset + 8), p),  p += 4;
       if (plt_static_chain)
        bfd_put_32 (obfd, LD_R11_0R12 | PPC_LO (offset + 16), p), p += 4;
-      bfd_put_32 (obfd, BCTR, p),                              p += 4;
     }
   else
     {
       if (r != NULL)
        {
-         r[0].r_offset += 4;
+         if (ALWAYS_EMIT_R2SAVE
+             || stub_entry->stub_type == ppc_stub_plt_call_r2save)
+           r[0].r_offset += 4;
          r[0].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_DS);
          if (PPC_HA (offset + 8 + 8 * plt_static_chain) != PPC_HA (offset))
            {
@@ -9537,7 +9664,7 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
            }
          else
            {
-             r[1].r_offset = r[0].r_offset + 8;
+             r[1].r_offset = r[0].r_offset + 8 + 8 * use_fake_dep;
              r[1].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_DS);
              r[1].r_addend = r[0].r_addend + 8 + 8 * plt_static_chain;
              if (plt_static_chain)
@@ -9548,7 +9675,9 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
                }
            }
        }
-      bfd_put_32 (obfd, STD_R2_40R1, p),                       p += 4;
+      if (ALWAYS_EMIT_R2SAVE
+         || stub_entry->stub_type == ppc_stub_plt_call_r2save)
+       bfd_put_32 (obfd, STD_R2_40R1, p),                      p += 4;
       bfd_put_32 (obfd, LD_R11_0R2 | PPC_LO (offset), p),      p += 4;
       if (PPC_HA (offset + 8 + 8 * plt_static_chain) != PPC_HA (offset))
        {
@@ -9556,11 +9685,23 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
          offset = 0;
        }
       bfd_put_32 (obfd, MTCTR_R11, p),                         p += 4;
+      if (use_fake_dep)
+       {
+         bfd_put_32 (obfd, XOR_R11_R11_R11, p),                p += 4;
+         bfd_put_32 (obfd, ADD_R2_R2_R11, p),                  p += 4;
+       }
       if (plt_static_chain)
        bfd_put_32 (obfd, LD_R11_0R2 | PPC_LO (offset + 16), p), p += 4;
       bfd_put_32 (obfd, LD_R2_0R2 | PPC_LO (offset + 8), p),   p += 4;
-      bfd_put_32 (obfd, BCTR, p),                              p += 4;
     }
+  if (plt_thread_safe && !use_fake_dep)
+    {
+      bfd_put_32 (obfd, CMPLDI_R2_0, p),                       p += 4;
+      bfd_put_32 (obfd, BNECTR_P4, p),                         p += 4;
+      bfd_put_32 (obfd, B_DOT + cmp_branch_off, p),            p += 4;
+    }
+  else
+    bfd_put_32 (obfd, BCTR, p),                                        p += 4;
   return p;
 }
 
@@ -9581,9 +9722,12 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
 #define MTLR_R11       0x7d6803a6
 
 static inline bfd_byte *
-build_tls_get_addr_stub (bfd *obfd, bfd_byte *p, int offset,
-                        Elf_Internal_Rela *r, bfd_boolean plt_static_chain)
+build_tls_get_addr_stub (struct ppc_link_hash_table *htab,
+                        struct ppc_stub_hash_entry *stub_entry,
+                        bfd_byte *p, bfd_vma offset, Elf_Internal_Rela *r)
 {
+  bfd *obfd = htab->stub_bfd;
+
   bfd_put_32 (obfd, LD_R11_0R3 + 0, p),                p += 4;
   bfd_put_32 (obfd, LD_R12_0R3 + 8, p),                p += 4;
   bfd_put_32 (obfd, MR_R0_R3, p),              p += 4;
@@ -9596,7 +9740,7 @@ build_tls_get_addr_stub (bfd *obfd, bfd_byte *p, int offset,
 
   if (r != NULL)
     r[0].r_offset += 9 * 4;
-  p = build_plt_stub (obfd, p, offset, r, plt_static_chain);
+  p = build_plt_stub (htab, stub_entry, p, offset, r);
   bfd_put_32 (obfd, BCTRL, p - 4);
 
   bfd_put_32 (obfd, LD_R11_0R1 + 32, p),       p += 4;
@@ -9943,6 +10087,7 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
       break;
 
     case ppc_stub_plt_call:
+    case ppc_stub_plt_call_r2save:
       if (stub_entry->h != NULL
          && stub_entry->h->is_func_descriptor
          && stub_entry->h->oh != NULL)
@@ -10009,6 +10154,15 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
          return FALSE;
        }
 
+      if (htab->plt_stub_align != 0)
+       {
+         unsigned pad = plt_stub_pad (htab, stub_entry, off);
+
+         stub_entry->stub_sec->size += pad;
+         stub_entry->stub_offset = stub_entry->stub_sec->size;
+         loc += pad;
+       }
+
       r = NULL;
       if (info->emitrelocations)
        {
@@ -10028,11 +10182,9 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
          && (stub_entry->h == htab->tls_get_addr_fd
              || stub_entry->h == htab->tls_get_addr)
          && !htab->no_tls_get_addr_opt)
-       p = build_tls_get_addr_stub (htab->stub_bfd, loc, off, r,
-                                    htab->plt_static_chain);
+       p = build_tls_get_addr_stub (htab, stub_entry, loc, off, r);
       else
-       p = build_plt_stub (htab->stub_bfd, loc, off, r,
-                           htab->plt_static_chain);
+       p = build_plt_stub (htab, stub_entry, loc, off, r);
       size = p - loc;
       break;
 
@@ -10052,6 +10204,7 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
                                       "long_branch_r2off",
                                       "plt_branch",
                                       "plt_branch_r2off",
+                                      "plt_call",
                                       "plt_call" };
 
       len1 = strlen (stub_str[stub_entry->stub_type - 1]);
@@ -10102,7 +10255,8 @@ ppc_size_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
   if (htab == NULL)
     return FALSE;
 
-  if (stub_entry->stub_type == ppc_stub_plt_call)
+  if (stub_entry->stub_type == ppc_stub_plt_call
+      || stub_entry->stub_type == ppc_stub_plt_call_r2save)
     {
       asection *plt;
       off = stub_entry->plt_ent->plt.offset & ~(bfd_vma) 1;
@@ -10118,18 +10272,9 @@ ppc_size_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
              - elf_gp (plt->output_section->owner)
              - htab->stub_group[stub_entry->id_sec->id].toc_off);
 
-      size = PLT_CALL_STUB_SIZE;
-      if (!htab->plt_static_chain)
-       size -= 4;
-      if (PPC_HA (off) == 0)
-       size -= 4;
-      if (PPC_HA (off + 8 + 8 * htab->plt_static_chain) != PPC_HA (off))
-       size += 4;
-      if (stub_entry->h != NULL
-         && (stub_entry->h == htab->tls_get_addr_fd
-             || stub_entry->h == htab->tls_get_addr)
-         && !htab->no_tls_get_addr_opt)
-       size += 13 * 4;
+      size = plt_stub_size (htab, stub_entry, off);
+      if (htab->plt_stub_align)
+       size += plt_stub_pad (htab, stub_entry, off);
       if (info->emitrelocations)
        {
          stub_entry->stub_sec->reloc_count
@@ -11098,7 +11243,8 @@ maybe_strip_output (struct bfd_link_info *info, asection *isec)
 
 bfd_boolean
 ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
-                     bfd_boolean plt_static_chain)
+                     bfd_boolean plt_static_chain, int plt_thread_safe,
+                     int plt_stub_align)
 {
   bfd_size_type stub_group_size;
   bfd_boolean stubs_always_before_branch;
@@ -11108,6 +11254,40 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
     return FALSE;
 
   htab->plt_static_chain = plt_static_chain;
+  htab->plt_stub_align = plt_stub_align;
+  if (plt_thread_safe == -1)
+    {
+      const char *const thread_starter[] =
+       {
+         "pthread_create",
+         /* libstdc++ */
+         "_ZNSt6thread15_M_start_threadESt10shared_ptrINS_10_Impl_baseEE",
+         /* librt */
+         "aio_init", "aio_read", "aio_write", "aio_fsync", "lio_listio",
+         "mq_notify", "create_timer",
+         /* libanl */
+         "getaddrinfo_a",
+         /* libgomp */
+         "GOMP_parallel_start",
+         "GOMP_parallel_loop_static_start",
+         "GOMP_parallel_loop_dynamic_start",
+         "GOMP_parallel_loop_guided_start",
+         "GOMP_parallel_loop_runtime_start",
+         "GOMP_parallel_sections_start", 
+       };
+      unsigned i;
+
+      for (i = 0; i < sizeof (thread_starter)/ sizeof (thread_starter[0]); i++)
+       {
+         struct elf_link_hash_entry *h;
+         h = elf_link_hash_lookup (&htab->elf, thread_starter[i],
+                                   FALSE, FALSE, TRUE);
+         plt_thread_safe = h != NULL && h->ref_regular;
+         if (plt_thread_safe)
+           break;
+       }
+    }
+  htab->plt_thread_safe = plt_thread_safe;
   stubs_always_before_branch = group_size < 0;
   if (group_size < 0)
     stub_group_size = -group_size;
@@ -11342,10 +11522,14 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
                  if (stub_type == ppc_stub_plt_call
                      && irela + 1 < irelaend
                      && irela[1].r_offset == irela->r_offset + 4
-                     && ELF64_R_TYPE (irela[1].r_info) == R_PPC64_TOCSAVE
-                     && !tocsave_find (htab, INSERT,
-                                       &local_syms, irela + 1, input_bfd))
-                   goto error_ret_free_internal;
+                     && ELF64_R_TYPE (irela[1].r_info) == R_PPC64_TOCSAVE)
+                   {
+                     if (!tocsave_find (htab, INSERT,
+                                        &local_syms, irela + 1, input_bfd))
+                       goto error_ret_free_internal;
+                   }
+                 else if (stub_type == ppc_stub_plt_call)
+                   stub_type = ppc_stub_plt_call_r2save;
 
                  /* Support for grouping stub sections.  */
                  id_sec = htab->stub_group[section->id].link_sec;
@@ -11361,6 +11545,8 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
                    {
                      /* The proper stub has already been created.  */
                      free (stub_name);
+                     if (stub_type == ppc_stub_plt_call_r2save)
+                       stub_entry->stub_type = stub_type;
                      continue;
                    }
 
@@ -11380,7 +11566,8 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
                    }
 
                  stub_entry->stub_type = stub_type;
-                 if (stub_type != ppc_stub_plt_call)
+                 if (stub_type != ppc_stub_plt_call
+                     && stub_type != ppc_stub_plt_call_r2save)
                    {
                      stub_entry->target_value = code_value;
                      stub_entry->target_section = code_sec;
@@ -11460,6 +11647,14 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
          htab->glink_eh_frame->size = size;
        }
 
+      if (htab->plt_stub_align != 0)
+       for (stub_sec = htab->stub_bfd->sections;
+            stub_sec != NULL;
+            stub_sec = stub_sec->next)
+         if ((stub_sec->flags & SEC_LINKER_CREATED) == 0)
+           stub_sec->size = ((stub_sec->size + (1 << htab->plt_stub_align) - 1)
+                             & (-1 << htab->plt_stub_align));
+
       for (stub_sec = htab->stub_bfd->sections;
           stub_sec != NULL;
           stub_sec = stub_sec->next)
@@ -11785,6 +11980,14 @@ ppc64_elf_build_stubs (bfd_boolean emit_stub_syms,
   if (htab->relbrlt != NULL)
     htab->relbrlt->reloc_count = 0;
 
+  if (htab->plt_stub_align != 0)
+    for (stub_sec = htab->stub_bfd->sections;
+        stub_sec != NULL;
+        stub_sec = stub_sec->next)
+      if ((stub_sec->flags & SEC_LINKER_CREATED) == 0)
+       stub_sec->size = ((stub_sec->size + (1 << htab->plt_stub_align) - 1)
+                         & (-1 << htab->plt_stub_align));
+
   for (stub_sec = htab->stub_bfd->sections;
        stub_sec != NULL;
        stub_sec = stub_sec->next)
@@ -11818,14 +12021,16 @@ ppc64_elf_build_stubs (bfd_boolean emit_stub_syms,
                         "  toc adjust   %lu\n"
                         "  long branch  %lu\n"
                         "  long toc adj %lu\n"
-                        "  plt call     %lu"),
+                        "  plt call     %lu\n"
+                        "  plt call toc %lu"),
               stub_sec_count,
               stub_sec_count == 1 ? "" : "s",
               htab->stub_count[ppc_stub_long_branch - 1],
               htab->stub_count[ppc_stub_long_branch_r2off - 1],
               htab->stub_count[ppc_stub_plt_branch - 1],
               htab->stub_count[ppc_stub_plt_branch_r2off - 1],
-              htab->stub_count[ppc_stub_plt_call - 1]);
+              htab->stub_count[ppc_stub_plt_call - 1],
+              htab->stub_count[ppc_stub_plt_call_r2save - 1]);
     }
   return TRUE;
 }
@@ -11925,8 +12130,8 @@ ppc64_elf_relocate_section (bfd *output_bfd,
   bfd_vma TOCstart;
   bfd_boolean ret = TRUE;
   bfd_boolean is_opd;
-  /* Disabled until we sort out how ld should choose 'y' vs 'at'.  */
-  bfd_boolean is_power4 = FALSE;
+  /* Assume 'at' branch hints.  */
+  bfd_boolean is_isa_v2 = TRUE;
   bfd_vma d_offset = (bfd_big_endian (output_bfd) ? 2 : 0);
 
   /* Initialize howto table if needed.  */
@@ -12558,6 +12763,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
          stub_entry = ppc_get_stub_entry (input_section, sec, fdh, rel, htab);
          if (stub_entry != NULL
              && (stub_entry->stub_type == ppc_stub_plt_call
+                 || stub_entry->stub_type == ppc_stub_plt_call_r2save
                  || stub_entry->stub_type == ppc_stub_plt_branch_r2off
                  || stub_entry->stub_type == ppc_stub_long_branch_r2off))
            {
@@ -12586,7 +12792,8 @@ ppc64_elf_relocate_section (bfd *output_bfd,
 
              if (!can_plt_call)
                {
-                 if (stub_entry->stub_type == ppc_stub_plt_call)
+                 if (stub_entry->stub_type == ppc_stub_plt_call
+                     || stub_entry->stub_type == ppc_stub_plt_call_r2save)
                    {
                      /* If this is a plain branch rather than a branch
                         and link, don't require a nop.  However, don't
@@ -12633,7 +12840,8 @@ ppc64_elf_relocate_section (bfd *output_bfd,
                }
 
              if (can_plt_call
-                 && stub_entry->stub_type == ppc_stub_plt_call)
+                 && (stub_entry->stub_type == ppc_stub_plt_call
+                     || stub_entry->stub_type == ppc_stub_plt_call_r2save))
                unresolved_reloc = FALSE;
            }
 
@@ -12679,7 +12887,10 @@ ppc64_elf_relocate_section (bfd *output_bfd,
                            + stub_entry->stub_sec->output_section->vma);
              addend = 0;
 
-             if (stub_entry->stub_type == ppc_stub_plt_call
+             if ((stub_entry->stub_type == ppc_stub_plt_call
+                  || stub_entry->stub_type == ppc_stub_plt_call_r2save)
+                 && (ALWAYS_EMIT_R2SAVE
+                     || stub_entry->stub_type == ppc_stub_plt_call_r2save)
                  && rel + 1 < relend
                  && rel[1].r_offset == rel->r_offset + 4
                  && ELF64_R_TYPE (rel[1].r_info) == R_PPC64_TOCSAVE)
@@ -12688,7 +12899,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
 
          if (insn != 0)
            {
-             if (is_power4)
+             if (is_isa_v2)
                {
                  /* Set 'a' bit.  This is 0b00010 in BO field for branch
                     on CR(BI) insns (BO == 001at or 011at), and 0b01000
index 9026c56..2728b27 100644 (file)
@@ -1,5 +1,5 @@
 /* PowerPC64-specific support for 64-bit ELF.
-   Copyright 2002, 2003, 2004, 2005, 2007, 2008, 2010, 2011
+   Copyright 2002, 2003, 2004, 2005, 2007, 2008, 2010, 2011, 2012
    Free Software Foundation, Inc.
 
    This file is part of BFD, the Binary File Descriptor library.
@@ -49,7 +49,7 @@ bfd_boolean ppc64_elf_check_init_fini
 bfd_boolean ppc64_elf_next_input_section
   (struct bfd_link_info *, asection *);
 bfd_boolean ppc64_elf_size_stubs
-  (struct bfd_link_info *, bfd_signed_vma, bfd_boolean);
+(struct bfd_link_info *, bfd_signed_vma, bfd_boolean, int, int);
 bfd_boolean ppc64_elf_build_stubs
   (bfd_boolean, struct bfd_link_info *, char **);
 void ppc64_elf_restore_symbols
index d779176..8d4027f 100644 (file)
@@ -1,3 +1,15 @@
+2012-01-11  Alan Modra  <amodra@gmail.com>
+
+       * emultempl/ppc64elf.em (PARSE_AND_LIST_PROLOGUE,
+       PARSE_AND_LIST_LONGOPTS, PARSE_AND_LIST_OPTIONS,
+       PARSE_AND_LIST_ARGS_CASES): Handle --{no-,}plt-thread-safe and
+       --{no-,}plt-align.
+       (plt_thread_safe, plt_stub_align): New vars.
+       (gld${EMULATION_NAME}_after_allocation): Pass them to
+       ppc64_elf_size_stubs.  Align stub sections according to plt_stub_align.
+       * ld.texinfo: Document new command line options, and an old
+       undocumented option.
+
 2012-01-09  Roland McGrath  <mcgrathr@google.com>
 
        * configure.in: Use AM_ZLIB.
index 9c352ee..5b637e1 100644 (file)
@@ -1,5 +1,5 @@
 # This shell script emits a C file. -*- C -*-
-# Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+# Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
 # Free Software Foundation, Inc.
 #
 # This file is part of the GNU Binutils.
@@ -64,6 +64,12 @@ static int no_toc_sort = 0;
 /* Set if PLT call stubs should load r11.  */
 static int plt_static_chain = ${DEFAULT_PLT_STATIC_CHAIN-0};
 
+/* Set if PLT call stubs need to be thread safe on power7+.  */
+static int plt_thread_safe = -1;
+
+/* Set if individual PLT call stubs should be aligned.  */
+static int plt_stub_align = 0;
+
 /* Whether to emit symbols for stubs.  */
 static int emit_stub_syms = -1;
 
@@ -379,7 +385,8 @@ ppc_add_stub_section (const char *stub_sec_name, asection *input_section)
   stub_sec = bfd_make_section_anyway_with_flags (stub_file->the_bfd,
                                                 stub_sec_name, flags);
   if (stub_sec == NULL
-      || !bfd_set_section_alignment (stub_file->the_bfd, stub_sec, 5))
+      || !bfd_set_section_alignment (stub_file->the_bfd, stub_sec,
+                                    plt_stub_align > 5 ? plt_stub_align : 5))
     goto err_ret;
 
   output_section = input_section->output_section;
@@ -504,7 +511,9 @@ gld${EMULATION_NAME}_after_allocation (void)
            einfo ("%P: .init/.fini fragments use differing TOC pointers\n");
 
          /* Call into the BFD backend to do the real work.  */
-         if (!ppc64_elf_size_stubs (&link_info, group_size, plt_static_chain))
+         if (!ppc64_elf_size_stubs (&link_info, group_size,
+                                    plt_static_chain, plt_thread_safe,
+                                    plt_stub_align))
            einfo ("%X%P: can not size stub section: %E\n");
        }
     }
@@ -649,7 +658,11 @@ PARSE_AND_LIST_PROLOGUE=${PARSE_AND_LIST_PROLOGUE}'
 #define OPTION_STUBGROUP_SIZE          321
 #define OPTION_PLT_STATIC_CHAIN                (OPTION_STUBGROUP_SIZE + 1)
 #define OPTION_NO_PLT_STATIC_CHAIN     (OPTION_PLT_STATIC_CHAIN + 1)
-#define OPTION_STUBSYMS                        (OPTION_NO_PLT_STATIC_CHAIN + 1)
+#define OPTION_PLT_THREAD_SAFE         (OPTION_NO_PLT_STATIC_CHAIN + 1)
+#define OPTION_NO_PLT_THREAD_SAFE      (OPTION_PLT_THREAD_SAFE + 1)
+#define OPTION_PLT_ALIGN               (OPTION_NO_PLT_THREAD_SAFE + 1)
+#define OPTION_NO_PLT_ALIGN            (OPTION_PLT_ALIGN + 1)
+#define OPTION_STUBSYMS                        (OPTION_NO_PLT_ALIGN + 1)
 #define OPTION_NO_STUBSYMS             (OPTION_STUBSYMS + 1)
 #define OPTION_DOTSYMS                 (OPTION_NO_STUBSYMS + 1)
 #define OPTION_NO_DOTSYMS              (OPTION_DOTSYMS + 1)
@@ -666,6 +679,10 @@ PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}'
   { "stub-group-size", required_argument, NULL, OPTION_STUBGROUP_SIZE },
   { "plt-static-chain", no_argument, NULL, OPTION_PLT_STATIC_CHAIN },
   { "no-plt-static-chain", no_argument, NULL, OPTION_NO_PLT_STATIC_CHAIN },
+  { "plt-thread-safe", no_argument, NULL, OPTION_PLT_THREAD_SAFE },
+  { "no-plt-thread-safe", no_argument, NULL, OPTION_NO_PLT_THREAD_SAFE },
+  { "plt-align", optional_argument, NULL, OPTION_PLT_ALIGN },
+  { "no-plt-align", no_argument, NULL, OPTION_NO_PLT_ALIGN },
   { "emit-stub-syms", no_argument, NULL, OPTION_STUBSYMS },
   { "no-emit-stub-syms", no_argument, NULL, OPTION_NO_STUBSYMS },
   { "dotsyms", no_argument, NULL, OPTION_DOTSYMS },
@@ -691,10 +708,22 @@ PARSE_AND_LIST_OPTIONS=${PARSE_AND_LIST_OPTIONS}'
                                 choose suitable defaults.\n"
                   ));
   fprintf (file, _("\
-  --plt-static-chain          PLT call stubs should load r11.\n"
+  --plt-static-chain          PLT call stubs should load r11.${DEFAULT_PLT_STATIC_CHAIN- (default)}\n"
+                  ));
+  fprintf (file, _("\
+  --no-plt-static-chain       PLT call stubs should not load r11.${DEFAULT_PLT_STATIC_CHAIN+ (default)}\n"
+                  ));
+  fprintf (file, _("\
+  --plt-thread-safe           PLT call stubs with load-load barrier.\n"
+                  ));
+  fprintf (file, _("\
+  --no-plt-thread-safe        PLT call stubs without barrier.\n"
+                  ));
+  fprintf (file, _("\
+  --plt-align [=<align>]      Align PLT call stubs to fit cache lines.\n"
                   ));
   fprintf (file, _("\
-  --no-plt-static-chain       PLT call stubs should not load r11. (default)\n"
+  --no-plt-align              Dont'\''t align individual PLT call stubs.\n"
                   ));
   fprintf (file, _("\
   --emit-stub-syms            Label linker stubs with a symbol.\n"
@@ -753,6 +782,31 @@ PARSE_AND_LIST_ARGS_CASES=${PARSE_AND_LIST_ARGS_CASES}'
       plt_static_chain = 0;
       break;
 
+    case OPTION_PLT_THREAD_SAFE:
+      plt_thread_safe = 1;
+      break;
+
+    case OPTION_NO_PLT_THREAD_SAFE:
+      plt_thread_safe = 0;
+      break;
+
+    case OPTION_PLT_ALIGN:
+      if (optarg != NULL)
+       {
+         char *end;
+         unsigned long val = strtoul (optarg, &end, 0);
+         if (*end || val > 8)
+           einfo (_("%P%F: invalid --plt-align `%s'\''\n"), optarg);
+         plt_stub_align = val;
+       }
+      else
+       plt_stub_align = 5;
+      break;
+
+    case OPTION_NO_PLT_ALIGN:
+      plt_stub_align = 0;
+      break;
+
     case OPTION_STUBSYMS:
       emit_stub_syms = 1;
       break;
index 592e38c..6c74ff5 100644 (file)
@@ -1,7 +1,7 @@
 \input texinfo
 @setfilename ld.info
 @c Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-@c 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+@c 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
 @c Free Software Foundation, Inc.
 @syncodeindex ky cp
 @c man begin INCLUDE
@@ -6710,7 +6710,9 @@ optimization.
 @cindex PowerPC64 multi-TOC
 @kindex --no-multi-toc
 @item --no-multi-toc
-By default, PowerPC64 GCC generates code for a TOC model where TOC
+If given any toc option besides @code{-mcmodel=medium} or
+@code{-mcmodel=large}, PowerPC64 GCC generates code for a TOC model
+where TOC
 entries are accessed with a 16-bit offset from r2.  This limits the
 total TOC size to 64K.  PowerPC64 @command{ld} extends this limit by
 grouping code sections such that each group uses less than 64K for its
@@ -6719,6 +6721,52 @@ calls.  @command{ld} does not split apart input sections, so cannot
 help if a single input file has a @code{.toc} section that exceeds
 64K, most likely from linking multiple files with @command{ld -r}.
 Use this option to turn off this feature.
+
+@cindex PowerPC64 TOC sorting
+@kindex --no-toc-sort
+@item --no-toc-sort
+By default, @command{ld} sorts TOC sections so that those whose file
+happens to have a section called @code{.init} or @code{.fini} are
+placed first, followed by TOC sections referenced by code generated
+with PowerPC64 gcc's @code{-mcmodel=small}, and lastly TOC sections
+referenced only by code generated with PowerPC64 gcc's
+@code{-mcmodel=medium} or @code{-mcmodel=large} options.  Doing this
+results in better TOC grouping for multi-TOC.  Use this option to turn
+off this feature.
+
+@cindex PowerPC64 PLT stub alignment
+@kindex --plt-align
+@kindex --no-plt-align
+@item --plt-align
+@itemx --no-plt-align
+Use these options to control whether individual PLT call stubs are
+aligned to a 32-byte boundary, or to the specified power of two
+boundary when using @code{--plt-align=}.  By default PLT call stubs
+are packed tightly.
+
+@cindex PowerPC64 PLT call stub static chain
+@kindex --plt-static-chain
+@kindex --no-plt-static-chain
+@item --plt-static-chain
+@itemx --no-plt-static-chain
+Use these options to control whether PLT call stubs load the static
+chain pointer (r11).  @code{ld} defaults to not loading the static
+chain since there is never any need to do so on a PLT call.
+
+@cindex PowerPC64 PLT call stub thread safety
+@kindex --plt-thread-safe
+@kindex --no-plt-thread-safe
+@item --plt-thread-safe
+@itemx --no-thread-safe
+With power7's weakly ordered memory model, it is possible when using
+lazy binding for ld.so to update a plt entry in one thread and have
+another thread see the individual plt entry words update in the wrong
+order, despite ld.so carefully writing in the correct order and using
+memory write barriers.  To avoid this we need some sort of read
+barrier in the call stub, or use LD_BIND_NOW=1.  By default, @code{ld}
+looks for calls to commonly used functions that create threads, and if
+seen, adds the necessary barriers.  Use these options to change the
+default behaviour.
 @end table
 
 @ifclear GENERIC