From: Alan Modra Date: Wed, 25 Mar 2015 05:23:47 +0000 (+1030) Subject: powerpc __tls_get_addr call optimization X-Git-Tag: upstream/2.30~6193 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=afcd9480feca651eef436d8438b783dde5c3bbb2;p=external%2Fglibc.git powerpc __tls_get_addr call optimization This patch is glibc support for a PowerPC TLS optimization, inspired by Alexandre Oliva's TLS optimization for other processors, http://www.lsd.ic.unicamp.br/~oliva/writeups/TLS/RFC-TLSDESC-x86.txt In essence, this optimization uses a zero module id in the tls_index GOT entry to indicate that a TLS variable is allocated space in the static TLS area. A special plt call linker stub for __tls_get_addr checks for such a tls_index and if found, returns the offset immediately. The linker communicates the fact that the special __tls_get_addr stub is used by setting a bit in the dynamic tag DT_PPC64_OPT/DT_PPC_OPT. glibc communicates to the linker that this optimization is available by the presence of __tls_get_addr_opt. tst-tlsmod2.so is built with -Wl,--no-tls-get-addr-optimize for tst-tls-dlinfo, which otherwise would fail since it tests that no static tls is allocated. The ld option --no-tls-get-addr-optimize has been available since binutils-2.20 so doesn't need a configure test. * NEWS: Advertise TLS optimization. * elf/elf.h (R_PPC_TLSGD, R_PPC_TLSLD, DT_PPC_OPT, PPC_OPT_TLS): Define. (DT_PPC_NUM): Increment. * elf/dynamic-link.h (HAVE_STATIC_TLS): Define. (CHECK_STATIC_TLS): Use here. * sysdeps/powerpc/powerpc32/dl-machine.h (elf_machine_rela): Optimize TLS descriptors. * sysdeps/powerpc/powerpc64/dl-machine.h (elf_machine_rela): Likewise. * sysdeps/powerpc/dl-tls.c: New file. * sysdeps/powerpc/Versions: Add __tls_get_addr_opt. * sysdeps/powerpc/tst-tlsopt-powerpc.c: New tls test. * sysdeps/unix/sysv/linux/powerpc/Makefile: Add new test. Build tst-tlsmod2.so with --no-tls-get-addr-optimize. * sysdeps/unix/sysv/linux/powerpc/powerpc32/ld.abilist: Update. * sysdeps/unix/sysv/linux/powerpc/powerpc64/ld.abilist: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc64/ld-le.abilist: Likewise. --- diff --git a/ChangeLog b/ChangeLog index 1e3d305..506032c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,24 @@ 2015-03-25 Alan Modra + * NEWS: Advertise TLS optimization. + * elf/elf.h (R_PPC_TLSGD, R_PPC_TLSLD, DT_PPC_OPT, PPC_OPT_TLS): Define. + (DT_PPC_NUM): Increment. + * elf/dynamic-link.h (HAVE_STATIC_TLS): Define. + (CHECK_STATIC_TLS): Use here. + * sysdeps/powerpc/powerpc32/dl-machine.h (elf_machine_rela): Optimize + TLS descriptors. + * sysdeps/powerpc/powerpc64/dl-machine.h (elf_machine_rela): Likewise. + * sysdeps/powerpc/dl-tls.c: New file. + * sysdeps/powerpc/Versions: Add __tls_get_addr_opt. + * sysdeps/powerpc/tst-tlsopt-powerpc.c: New tls test. + * sysdeps/unix/sysv/linux/powerpc/Makefile: Add new test. + Build tst-tlsmod2.so with --no-tls-get-addr-optimize. + * sysdeps/unix/sysv/linux/powerpc/powerpc32/ld.abilist: Update. + * sysdeps/unix/sysv/linux/powerpc/powerpc64/ld.abilist: Likewise. + * sysdeps/unix/sysv/linux/powerpc/powerpc64/ld-le.abilist: Likewise. + +2015-03-25 Alan Modra + * sysdeps/powerpc/powerpc64/configure.ac: Correct "linker support for overlapping .opd entries" to "support...". * sysdeps/powerpc/powerpc64/configure: Regenerate diff --git a/NEWS b/NEWS index aec1957..b66c1b1 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,10 @@ Version 2.22 18039, 18042, 18043, 18046, 18047, 18068, 18080, 18093, 18100, 18104, 18110, 18111, 18128, 18138. +* A powerpc and powerpc64 optimization for TLS, similar to TLS descriptors + for LD and GD on x86 and x86-64, has been implemented. You will need + binutils-2.24 or later to enable this optimization. + * Character encoding and ctype tables were updated to Unicode 7.0.0, using new generator scripts contributed by Pravin Satpute and Mike FABIAN (Red Hat). These updates cause user visible changes, such as the fix for bug diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h index 6f4a773..8d428e2 100644 --- a/elf/dynamic-link.h +++ b/elf/dynamic-link.h @@ -25,11 +25,14 @@ an attempt to allocate it in surplus space on the fly. If that can't be done, we fall back to the error that DF_STATIC_TLS is intended to produce. */ +#define HAVE_STATIC_TLS(map, sym_map) \ + (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET \ + && ((sym_map)->l_tls_offset \ + != FORCED_DYNAMIC_TLS_OFFSET), 1)) + #define CHECK_STATIC_TLS(map, sym_map) \ do { \ - if (__builtin_expect ((sym_map)->l_tls_offset == NO_TLS_OFFSET \ - || ((sym_map)->l_tls_offset \ - == FORCED_DYNAMIC_TLS_OFFSET), 0)) \ + if (!HAVE_STATIC_TLS (map, sym_map)) \ _dl_allocate_static_tls (sym_map); \ } while (0) diff --git a/elf/elf.h b/elf/elf.h index 496f08d..71492a2 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -2194,6 +2194,8 @@ enum #define R_PPC_GOT_DTPREL16_LO 92 /* half16* (sym+add)@got@dtprel@l */ #define R_PPC_GOT_DTPREL16_HI 93 /* half16* (sym+add)@got@dtprel@h */ #define R_PPC_GOT_DTPREL16_HA 94 /* half16* (sym+add)@got@dtprel@ha */ +#define R_PPC_TLSGD 95 /* none (sym+add)@tlsgd */ +#define R_PPC_TLSLD 96 /* none (sym+add)@tlsld */ /* The remaining relocs are from the Embedded ELF ABI, and are not in the SVR4 ELF ABI. */ @@ -2237,7 +2239,11 @@ enum /* PowerPC specific values for the Dyn d_tag field. */ #define DT_PPC_GOT (DT_LOPROC + 0) -#define DT_PPC_NUM 1 +#define DT_PPC_OPT (DT_LOPROC + 1) +#define DT_PPC_NUM 2 + +/* PowerPC specific values for the DT_PPC_OPT Dyn entry. */ +#define PPC_OPT_TLS 1 /* PowerPC64 relocations defined by the ABIs */ #define R_PPC64_NONE R_PPC_NONE diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile index b11edd7..533d763 100644 --- a/sysdeps/powerpc/Makefile +++ b/sysdeps/powerpc/Makefile @@ -8,6 +8,9 @@ sysdep-dl-routines += dl-machine sysdep_routines += dl-machine # extra shared linker files to link only into dl-allobjs.so sysdep-rtld-routines += dl-machine +# Don't optimize GD tls sequence to LE. +LDFLAGS-tst-tlsopt-powerpc += -Wl,--no-tls-optimize +tests += tst-tlsopt-powerpc endif ifeq ($(subdir),setjmp) diff --git a/sysdeps/powerpc/Versions b/sysdeps/powerpc/Versions index 47c2c3e..2aebf7c 100644 --- a/sysdeps/powerpc/Versions +++ b/sysdeps/powerpc/Versions @@ -15,3 +15,9 @@ libc { __vmx__libc_longjmp; __vmx__libc_siglongjmp; } } + +ld { + GLIBC_2.22 { + __tls_get_addr_opt; + } +} diff --git a/sysdeps/powerpc/dl-tls.c b/sysdeps/powerpc/dl-tls.c new file mode 100644 index 0000000..a18b23e --- /dev/null +++ b/sysdeps/powerpc/dl-tls.c @@ -0,0 +1,24 @@ +/* Thread-local storage handling in the ELF dynamic linker. PowerPC version. + Copyright (C) 2009-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "elf/dl-tls.c" + +#ifdef SHARED +strong_alias(__tls_get_addr, __tls_get_addr_opt) +#endif diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h index c94674f..8b0c067 100644 --- a/sysdeps/powerpc/powerpc32/dl-machine.h +++ b/sysdeps/powerpc/powerpc32/dl-machine.h @@ -333,6 +333,32 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, # endif case R_PPC_DTPMOD32: + if (map->l_info[DT_PPC(OPT)] + && (map->l_info[DT_PPC(OPT)]->d_un.d_val & PPC_OPT_TLS)) + { + if (!NOT_BOOTSTRAP) + { + reloc_addr[0] = 0; + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + break; + } + else if (sym_map != NULL) + { +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (TRY_STATIC_TLS (map, sym_map)) +# endif + { + reloc_addr[0] = 0; + /* Set up for local dynamic. */ + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + break; + } + } + } if (!NOT_BOOTSTRAP) /* During startup the dynamic linker is always index 1. */ *reloc_addr = 1; @@ -342,6 +368,28 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, *reloc_addr = sym_map->l_tls_modid; break; case R_PPC_DTPREL32: + if (map->l_info[DT_PPC(OPT)] + && (map->l_info[DT_PPC(OPT)]->d_un.d_val & PPC_OPT_TLS)) + { + if (!NOT_BOOTSTRAP) + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + break; + } + else if (sym_map != NULL) + { + /* This reloc is always preceded by R_PPC_DTPMOD32. */ +# ifndef SHARED + assert (HAVE_STATIC_TLS (map, sym_map)); +# else + if (HAVE_STATIC_TLS (map, sym_map)) +# endif + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + break; + } + } + } /* During relocation all TLS symbols are defined and used. Therefore the offset is already correct. */ if (NOT_BOOTSTRAP && sym_map != NULL) diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h index 5cb0087..55ac736 100644 --- a/sysdeps/powerpc/powerpc64/dl-machine.h +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -701,6 +701,32 @@ elf_machine_rela (struct link_map *map, return; case R_PPC64_DTPMOD64: + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS)) + { +#ifdef RTLD_BOOTSTRAP + reloc_addr[0] = 0; + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + return; +#else + if (sym_map != NULL) + { +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (TRY_STATIC_TLS (map, sym_map)) +# endif + { + reloc_addr[0] = 0; + /* Set up for local dynamic. */ + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + return; + } + } +#endif + } #ifdef RTLD_BOOTSTRAP /* During startup the dynamic linker is always index 1. */ *reloc_addr = 1; @@ -713,6 +739,28 @@ elf_machine_rela (struct link_map *map, return; case R_PPC64_DTPREL64: + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS)) + { +#ifdef RTLD_BOOTSTRAP + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + return; +#else + if (sym_map != NULL) + { + /* This reloc is always preceded by R_PPC64_DTPMOD64. */ +# ifndef SHARED + assert (HAVE_STATIC_TLS (map, sym_map)); +# else + if (HAVE_STATIC_TLS (map, sym_map)) +# endif + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + return; + } + } +#endif + } /* During relocation all TLS symbols are defined and used. Therefore the offset is already correct. */ #ifndef RTLD_BOOTSTRAP diff --git a/sysdeps/powerpc/tst-tlsopt-powerpc.c b/sysdeps/powerpc/tst-tlsopt-powerpc.c new file mode 100644 index 0000000..c9a14cb --- /dev/null +++ b/sysdeps/powerpc/tst-tlsopt-powerpc.c @@ -0,0 +1,52 @@ +/* glibc test for __tls_get_addr optimization. */ +#include + +#include "../../elf/tls-macros.h" +#include "dl-tls.h" + +/* common 'int' variable in TLS. */ +COMMON_INT_DEF(foo); + + +#define TEST_FUNCTION do_test () +static int +do_test (void) +{ + int result = 0; + + /* Get variable using general dynamic model. */ + int *ap = TLS_GD (foo); + if (*ap != 0) + { + printf ("foo = %d\n", *ap); + result = 1; + } + + tls_index *tls_arg; +#ifdef __powerpc64__ + register unsigned long thread_pointer __asm__ ("r13"); + asm ("addi %0,2,foo@got@tlsgd" : "=r" (tls_arg)); +#else + register unsigned long thread_pointer __asm__ ("r2"); + asm ("bcl 20,31,1f\n1:\t" + "mflr %0\n\t" + "addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" + "addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" + "addi %0,%0,foo@got@tlsgd" : "=b" (tls_arg)); +#endif + + if (tls_arg->ti_module != 0) + { + printf ("tls_index not optimized, binutils too old?\n"); + result = 1; + } + else if (tls_arg->ti_offset + thread_pointer != (unsigned long) ap) + { + printf ("tls_index->ti_offset wrong value\n"); + result = 1; + } + + return result; +} + +#include "../../test-skeleton.c" diff --git a/sysdeps/unix/sysv/linux/powerpc/Makefile b/sysdeps/unix/sysv/linux/powerpc/Makefile index fcf3bb5..c89ed9e 100644 --- a/sysdeps/unix/sysv/linux/powerpc/Makefile +++ b/sysdeps/unix/sysv/linux/powerpc/Makefile @@ -20,6 +20,8 @@ ifeq ($(build-shared),yes) # This is needed for DSO loading from static binaries. sysdep-dl-routines += dl-static endif +# Otherwise tst-tls-dlinfo fails due to tst-tlsmod2.so using static tls. +LDFLAGS-tst-tlsmod2.so += -Wl,--no-tls-get-addr-optimize endif ifeq ($(subdir),misc) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/ld.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/ld.abilist index d71611f..7d24961 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/ld.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/ld.abilist @@ -10,6 +10,9 @@ GLIBC_2.1 GLIBC_2.1 A __libc_stack_end D 0x4 _dl_mcount F +GLIBC_2.22 + GLIBC_2.22 A + __tls_get_addr_opt F GLIBC_2.3 GLIBC_2.3 A __tls_get_addr F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/ld-le.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/ld-le.abilist index 3530fb4..3174e21 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/ld-le.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/ld-le.abilist @@ -9,3 +9,6 @@ GLIBC_2.17 free F malloc F realloc F +GLIBC_2.22 + GLIBC_2.22 A + __tls_get_addr_opt F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/ld.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/ld.abilist index 899360e..d8c4201 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/ld.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/ld.abilist @@ -1,3 +1,6 @@ +GLIBC_2.22 + GLIBC_2.22 A + __tls_get_addr_opt F GLIBC_2.3 GLIBC_2.3 A __libc_memalign F