From 4d87b39422b38536552be9de805675e63a9e15c2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 28 May 2005 23:40:09 +0000 Subject: [PATCH] * elf/elf.h (DT_ALPHA_PLTRO, DT_ALPHA_NUM): New. * sysdeps/alpha/dl-dtprocnum.h: New file. * sysdeps/alpha/dl-machine.h (DT_ALPHA): New. (elf_machine_load_address): Simplify to rely on gprel relocations. (elf_machine_runtime_setup): Handle DT_ALPHA_PLTRO plt format. Remove thread safety workaround for binutils 2.6. (elf_machine_fixup_plt): Handle DT_ALPHA_PLTRO plt format. * sysdeps/alpha/dl-trampoline.S (_dl_runtime_resolve_new): New. (_dl_runtime_profile_new): New. (_dl_runtime_resolve_old): Rename from _dl_runtime_resolve. (_dl_runtime_profile_old): Rename from _dl_runtime_profile. Fix typo in _dl_call_pltexit argument loading. * sysdeps/alpha/div_libc.h (funcnoplt): New. * sysdeps/alpha/divl.S, sysdeps/alpha/divq.S: Use it. * sysdeps/alpha/divqu.S, sysdeps/alpha/reml.S: Likewise. * sysdeps/alpha/remq.S, sysdeps/alpha/remqu.S: Likewise. 2005-05-28 Richard Henderson * elf/elf.h (DT_ALPHA_PLTRO, DT_ALPHA_NUM): New. * sysdeps/alpha/dl-dtprocnum.h: New file. * sysdeps/alpha/dl-machine.h (DT_ALPHA): New. (elf_machine_load_address): Simplify to rely on gprel relocations. (elf_machine_runtime_setup): Handle DT_ALPHA_PLTRO plt format. Remove thread safety workaround for binutils 2.6. (elf_machine_fixup_plt): Handle DT_ALPHA_PLTRO plt format. * sysdeps/alpha/dl-trampoline.S (_dl_runtime_resolve_new): New. (_dl_runtime_profile_new): New. (_dl_runtime_resolve_old): Rename from _dl_runtime_resolve. (_dl_runtime_profile_old): Rename from _dl_runtime_profile. Fix typo in _dl_call_pltexit argument loading. * sysdeps/alpha/div_libc.h (funcnoplt): New. * sysdeps/alpha/divl.S, sysdeps/alpha/divq.S: Use it. * sysdeps/alpha/divqu.S, sysdeps/alpha/reml.S: Likewise. * sysdeps/alpha/remq.S, sysdeps/alpha/remqu.S: Likewise. --- ChangeLog | 20 ++++ elf/elf.h | 3 + sysdeps/alpha/div_libc.h | 6 ++ sysdeps/alpha/divl.S | 2 +- sysdeps/alpha/divq.S | 2 +- sysdeps/alpha/divqu.S | 2 +- sysdeps/alpha/dl-dtprocnum.h | 3 + sysdeps/alpha/dl-machine.h | 146 ++++++++++++--------------- sysdeps/alpha/dl-trampoline.S | 222 ++++++++++++++++++++++++++++++++++++++---- sysdeps/alpha/reml.S | 2 +- sysdeps/alpha/remq.S | 2 +- sysdeps/alpha/remqu.S | 2 +- 12 files changed, 301 insertions(+), 111 deletions(-) create mode 100644 sysdeps/alpha/dl-dtprocnum.h diff --git a/ChangeLog b/ChangeLog index cc62391..0e821a8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2005-05-28 Richard Henderson + + * elf/elf.h (DT_ALPHA_PLTRO, DT_ALPHA_NUM): New. + * sysdeps/alpha/dl-dtprocnum.h: New file. + * sysdeps/alpha/dl-machine.h (DT_ALPHA): New. + (elf_machine_load_address): Simplify to rely on gprel relocations. + (elf_machine_runtime_setup): Handle DT_ALPHA_PLTRO plt format. + Remove thread safety workaround for binutils 2.6. + (elf_machine_fixup_plt): Handle DT_ALPHA_PLTRO plt format. + * sysdeps/alpha/dl-trampoline.S (_dl_runtime_resolve_new): New. + (_dl_runtime_profile_new): New. + (_dl_runtime_resolve_old): Rename from _dl_runtime_resolve. + (_dl_runtime_profile_old): Rename from _dl_runtime_profile. Fix + typo in _dl_call_pltexit argument loading. + + * sysdeps/alpha/div_libc.h (funcnoplt): New. + * sysdeps/alpha/divl.S, sysdeps/alpha/divq.S: Use it. + * sysdeps/alpha/divqu.S, sysdeps/alpha/reml.S: Likewise. + * sysdeps/alpha/remq.S, sysdeps/alpha/remqu.S: Likewise. + 2005-05-26 Andreas Schwab * locale/Makefile (CFLAGS-loadlocale.c): Don't define. diff --git a/elf/elf.h b/elf/elf.h index e246519..5ae696d 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -1866,6 +1866,9 @@ typedef Elf32_Addr Elf32_Conflict; #define LITUSE_ALPHA_TLS_GD 4 #define LITUSE_ALPHA_TLS_LDM 5 +/* Legal values for d_tag of Elf64_Dyn. */ +#define DT_ALPHA_PLTRO 0x70000000 +#define DT_ALPHA_NUM 1 /* PowerPC specific declarations */ diff --git a/sysdeps/alpha/div_libc.h b/sysdeps/alpha/div_libc.h index 62b4470..b731b02 100644 --- a/sysdeps/alpha/div_libc.h +++ b/sysdeps/alpha/div_libc.h @@ -34,6 +34,12 @@ #define RV t12 #define RA t9 +/* The secureplt format does not allow the division routines to be called + via plt; there aren't enough registers free to be clobbered. Avoid + setting the symbol type to STT_FUNC, so that the linker won't be tempted + to create a plt entry. */ +#define funcnoplt notype + /* None of these functions should use implicit anything. */ .set nomacro .set noat diff --git a/sysdeps/alpha/divl.S b/sysdeps/alpha/divl.S index 408d66d..9bac045 100644 --- a/sysdeps/alpha/divl.S +++ b/sysdeps/alpha/divl.S @@ -36,7 +36,7 @@ .text .align 4 .globl __divl - .type __divl, @function + .type __divl, @funcnoplt .usepv __divl, no cfi_startproc diff --git a/sysdeps/alpha/divq.S b/sysdeps/alpha/divq.S index 7f245ac..d2ed2c5 100644 --- a/sysdeps/alpha/divq.S +++ b/sysdeps/alpha/divq.S @@ -43,7 +43,7 @@ .text .align 4 .globl __divq - .type __divq, @function + .type __divq, @funcnoplt .usepv __divq, no cfi_startproc diff --git a/sysdeps/alpha/divqu.S b/sysdeps/alpha/divqu.S index fc00fa1..f2a8a4d 100644 --- a/sysdeps/alpha/divqu.S +++ b/sysdeps/alpha/divqu.S @@ -43,7 +43,7 @@ .text .align 4 .globl __divqu - .type __divqu, @function + .type __divqu, @funcnoplt .usepv __divqu, no cfi_startproc diff --git a/sysdeps/alpha/dl-dtprocnum.h b/sysdeps/alpha/dl-dtprocnum.h new file mode 100644 index 0000000..67845cd --- /dev/null +++ b/sysdeps/alpha/dl-dtprocnum.h @@ -0,0 +1,3 @@ +/* Number of extra dynamic section entries for this architecture. By + default there are none. */ +#define DT_THISPROCNUM DT_ALPHA_NUM diff --git a/sysdeps/alpha/dl-machine.h b/sysdeps/alpha/dl-machine.h index 173a411..88c357e 100644 --- a/sysdeps/alpha/dl-machine.h +++ b/sysdeps/alpha/dl-machine.h @@ -33,6 +33,9 @@ where the dynamic linker should not map anything. */ #define ELF_MACHINE_USER_ADDRESS_MASK 0x120000000UL +/* Translate a processor specific dynamic tag to the index in l_info array. */ +#define DT_ALPHA(x) (DT_ALPHA_##x - DT_LOPROC + DT_NUM) + /* Return nonzero iff ELF header is compatible with the running host. */ static inline int elf_machine_matches_host (const Elf64_Ehdr *ehdr) @@ -55,105 +58,74 @@ elf_machine_dynamic (void) } /* Return the run-time load address of the shared object. */ + static inline Elf64_Addr elf_machine_load_address (void) { - /* NOTE: While it is generally unfriendly to put data in the text - segment, it is only slightly less so when the "data" is an - instruction. While we don't have to worry about GLD just yet, an - optimizing linker might decide that our "data" is an unreachable - instruction and throw it away -- with the right switches, DEC's - linker will do this. What ought to happen is we should add - something to GAS to allow us access to the new GPREL_HI32/LO32 - relocation types stolen from OSF/1 3.0. */ - /* This code relies on the fact that BRADDR relocations do not - appear in dynamic relocation tables. Not that that would be very - useful anyway -- br/bsr has a 4MB range and the shared libraries - are usually many many terabytes away. */ - - Elf64_Addr dot; - long int zero_disp; - - asm("br %0, 1f\n" - "0:\n\t" - "br $0, 2f\n" - "1:\n\t" - ".section\t.data\n" - "2:\n\t" - ".quad 0b\n\t" - ".previous" - : "=r"(dot)); - - zero_disp = *(int *) dot; - zero_disp = (zero_disp << 43) >> 41; - - return dot - *(Elf64_Addr *) (dot + 4 + zero_disp); + /* This relies on the compiler using gp-relative addresses for static symbols. */ + static void *dot = ˙ + return (void *)&dot - dot; } /* Set up the loaded object described by L so its unrelocated PLT entries will jump to the on-demand fixup code in dl-runtime.c. */ static inline int -elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) +elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) { - Elf64_Addr plt; - extern void _dl_runtime_resolve (void); - extern void _dl_runtime_profile (void); + extern char _dl_runtime_resolve_new[] attribute_hidden; + extern char _dl_runtime_profile_new[] attribute_hidden; + extern char _dl_runtime_resolve_old[] attribute_hidden; + extern char _dl_runtime_profile_old[] attribute_hidden; + + struct pltgot { + char *resolve; + struct link_map *link; + }; + + struct pltgot *pg; + long secureplt; + char *resolve; + + if (map->l_info[DT_JMPREL] == 0 || !lazy) + return lazy; + + /* Check to see if we're using the read-only plt form. */ + secureplt = map->l_info[DT_ALPHA(PLTRO)] != 0; + + /* If the binary uses the read-only secure plt format, PG points to + the .got.plt section, which is the right place for ld.so to place + its hooks. Otherwise, PG is currently pointing at the start of + the plt; the hooks go at offset 16. */ + pg = (struct pltgot *) D_PTR (map, l_info[DT_PLTGOT]); + pg += !secureplt; + + /* This function will be called to perform the relocation. They're + not declared as functions to convince the compiler to use gp + relative relocations for them. */ + if (secureplt) + resolve = _dl_runtime_resolve_new; + else + resolve = _dl_runtime_resolve_old; - if (l->l_info[DT_JMPREL] && lazy) + if (__builtin_expect (profile, 0)) { - /* The GOT entries for the functions in the PLT have not been - filled in yet. Their initial contents are directed to the - PLT which arranges for the dynamic linker to be called. */ - plt = D_PTR (l, l_info[DT_PLTGOT]); - - /* This function will be called to perform the relocation. */ - if (__builtin_expect (profile, 0)) - { - *(Elf64_Addr *)(plt + 16) = (Elf64_Addr) &_dl_runtime_profile; - - if (GLRO(dl_profile) != NULL - && _dl_name_match_p (GLRO(dl_profile), l)) - { - /* This is the object we are looking for. Say that we really - want profiling and the timers are started. */ - GL(dl_profile_map) = l; - } - } + if (secureplt) + resolve = _dl_runtime_profile_new; else - *(Elf64_Addr *)(plt + 16) = (Elf64_Addr) &_dl_runtime_resolve; - - /* Identify this shared object */ - *(Elf64_Addr *)(plt + 24) = (Elf64_Addr) l; + resolve = _dl_runtime_profile_old; - /* If the first instruction of the plt entry is not - "br $28, plt0", we have to reinitialize .plt for lazy relocation. */ - if (*(unsigned int *)(plt + 32) != 0xc39ffff7) + if (GLRO(dl_profile) && _dl_name_match_p (GLRO(dl_profile), map)) { - unsigned int val = 0xc39ffff7; - unsigned int *slot, *end; - const Elf64_Rela *rela = (const Elf64_Rela *) - D_PTR (l, l_info[DT_JMPREL]); - Elf64_Addr l_addr = l->l_addr; - - /* br t12,.+4; ldq t12,12(t12); nop; jmp t12,(t12),.+4 */ - *(unsigned long *)plt = 0xa77b000cc3600000; - *(unsigned long *)(plt + 8) = 0x6b7b000047ff041f; - slot = (unsigned int *)(plt + 32); - end = (unsigned int *)(plt + 32 - + l->l_info[DT_PLTRELSZ]->d_un.d_val / 2); - while (slot < end) - { - /* br at,.plt+0 */ - *slot = val; - *(Elf64_Addr *) rela->r_offset = (Elf64_Addr) slot - l_addr; - val -= 3; - slot += 3; - ++rela; - } + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = map; } } + pg->resolve = resolve; + pg->link = map; + return lazy; } @@ -280,7 +252,7 @@ $fixup_stack: \n\ /* Fix up the instructions of a PLT entry to invoke the function rather than the dynamic linker. */ static inline Elf64_Addr -elf_machine_fixup_plt (struct link_map *l, lookup_t t, +elf_machine_fixup_plt (struct link_map *map, lookup_t t, const Elf64_Rela *reloc, Elf64_Addr *got_addr, Elf64_Addr value) { @@ -291,10 +263,16 @@ elf_machine_fixup_plt (struct link_map *l, lookup_t t, /* Store the value we are going to load. */ *got_addr = value; + /* If this binary uses the read-only secure plt format, we're done. */ + if (map->l_info[DT_ALPHA(PLTRO)]) + return value; + + /* Otherwise we have to modify the plt entry in place to do the branch. */ + /* Recover the PLT entry address by calculating reloc's index into the .rela.plt, and finding that entry in the .plt. */ - rela_plt = (void *) D_PTR (l, l_info[DT_JMPREL]); - plte = (void *) (D_PTR (l, l_info[DT_PLTGOT]) + 32); + rela_plt = (const Elf64_Rela *) D_PTR (map, l_info[DT_JMPREL]); + plte = (Elf64_Word *) (D_PTR (map, l_info[DT_PLTGOT]) + 32); plte += 3 * (reloc - rela_plt); /* Find the displacement from the plt entry to the function. */ diff --git a/sysdeps/alpha/dl-trampoline.S b/sysdeps/alpha/dl-trampoline.S index 4235083..c52efbb 100644 --- a/sysdeps/alpha/dl-trampoline.S +++ b/sysdeps/alpha/dl-trampoline.S @@ -21,13 +21,202 @@ .set noat - .globl _dl_runtime_resolve - .ent _dl_runtime_resolve +.macro savei regno, offset + stq $\regno, \offset($30) + cfi_rel_offset(\regno, \offset) +.endm + +.macro savef regno, offset + stt $f\regno, \offset($30) + cfi_rel_offset(\regno+32, \offset) +.endm + + .align 4 + .globl _dl_runtime_resolve_new + .ent _dl_runtime_resolve_new + +#undef FRAMESIZE +#define FRAMESIZE 14*8 + +_dl_runtime_resolve_new: + .frame $30, FRAMESIZE, $26, 0 + .mask 0x4000000, 0 + + ldah $29, 0($27) !gpdisp!1 + lda $30, -FRAMESIZE($30) + stq $26, 0*8($30) + stq $16, 2*8($30) + + stq $17, 3*8($30) + lda $29, 0($29) !gpdisp!1 + stq $18, 4*8($30) + mov $28, $16 /* link_map from .got.plt */ + + stq $19, 5*8($30) + mov $25, $17 /* offset of reloc entry */ + stq $20, 6*8($30) + mov $26, $18 /* return address */ + + stq $21, 7*8($30) + stt $f16, 8*8($30) + stt $f17, 9*8($30) + stt $f18, 10*8($30) + + stt $f19, 11*8($30) + stt $f20, 12*8($30) + stt $f21, 13*8($30) + .prologue 2 + + bsr $26, _dl_fixup !samegp + mov $0, $27 + + ldq $26, 0*8($30) + ldq $16, 2*8($30) + ldq $17, 3*8($30) + ldq $18, 4*8($30) + ldq $19, 5*8($30) + ldq $20, 6*8($30) + ldq $21, 7*8($30) + ldt $f16, 8*8($30) + ldt $f17, 9*8($30) + ldt $f18, 10*8($30) + ldt $f19, 11*8($30) + ldt $f20, 12*8($30) + ldt $f21, 13*8($30) + lda $30, FRAMESIZE($30) + jmp $31, ($27), 0 + .end _dl_runtime_resolve_new + + .globl _dl_runtime_profile_new + .type _dl_runtime_profile_new, @function + +#undef FRAMESIZE +#define FRAMESIZE 20*8 + + /* We save the registers in a different order than desired by + .mask/.fmask, so we have to use explicit cfi directives. */ + cfi_startproc + +_dl_runtime_profile_new: + ldah $29, 0($27) !gpdisp!2 + lda $30, -FRAMESIZE($30) + savei 26, 0*8 + stq $16, 2*8($30) + + stq $17, 3*8($30) + lda $29, 0($29) !gpdisp!2 + stq $18, 4*8($30) + lda $1, FRAMESIZE($30) /* incoming sp value */ + + stq $1, 1*8($30) + stq $19, 5*8($30) + stq $20, 6*8($30) + mov $28, $16 /* link_map from .got.plt */ + + stq $21, 7*8($30) + mov $25, $17 /* offset of reloc entry */ + stt $f16, 8*8($30) + mov $26, $18 /* return address */ + + stt $f17, 9*8($30) + mov $30, $19 /* La_alpha_regs address */ + stt $f18, 10*8($30) + lda $20, 14*8($30) /* framesize address */ + + stt $f19, 11*8($30) + stt $f20, 12*8($30) + stt $f21, 13*8($30) + stq $28, 16*8($30) + stq $25, 17*8($30) + + bsr $26, _dl_profile_fixup !samegp + mov $0, $27 + + /* Discover if we're wrapping this call. */ + ldq $18, 14*8($30) + bge $18, 1f + + ldq $26, 0*8($30) + ldq $16, 2*8($30) + ldq $17, 3*8($30) + ldq $18, 4*8($30) + ldq $19, 5*8($30) + ldq $20, 6*8($30) + ldq $21, 7*8($30) + ldt $f16, 8*8($30) + ldt $f17, 9*8($30) + ldt $f18, 10*8($30) + ldt $f19, 11*8($30) + ldt $f20, 12*8($30) + ldt $f21, 13*8($30) + lda $30, FRAMESIZE($30) + jmp $31, ($27), 0 + +1: + /* Create a frame pointer and allocate a new argument frame. */ + savei 15, 15*8 + mov $30, $15 + cfi_def_cfa_register (15) + addq $18, 15, $18 + bic $18, 15, $18 + subq $30, $18, $30 + + /* Save the call destination around memcpy. */ + stq $0, 14*8($30) + + /* Copy the stack arguments into place. */ + lda $16, 0($30) + lda $17, FRAMESIZE($15) + jsr $26, memcpy + ldgp $29, 0($26) + + /* Reload the argument registers. */ + ldq $27, 14*8($30) + ldq $16, 2*8($15) + ldq $17, 3*8($15) + ldq $18, 4*8($15) + ldq $19, 5*8($15) + ldq $20, 6*8($15) + ldq $21, 7*8($15) + ldt $f16, 8*8($15) + ldt $f17, 9*8($15) + ldt $f18, 10*8($15) + ldt $f19, 11*8($15) + ldt $f20, 12*8($15) + ldt $f21, 13*8($15) + + jsr $26, ($27), 0 + ldgp $29, 0($26) + + /* Set up for call to _dl_call_pltexit. */ + ldq $16, 16*8($15) + ldq $17, 17*8($15) + stq $0, 16*8($15) + lda $18, 0($15) + stq $1, 17*8($15) + lda $19, 16*8($15) + stt $f0, 18*8($15) + stt $f1, 19*8($15) + bsr $26, _dl_call_pltexit !samegp + + mov $15, $30 + cfi_def_cfa_register (30) + ldq $26, 0($30) + ldq $15, 15*8($30) + lda $30, FRAMESIZE($30) + ret + + cfi_endproc + .size _dl_runtime_profile_new, .-_dl_runtime_profile_new + + .align 4 + .globl _dl_runtime_resolve_old + .ent _dl_runtime_resolve_old #undef FRAMESIZE #define FRAMESIZE 44*8 -_dl_runtime_resolve: +_dl_runtime_resolve_old: lda $30, -FRAMESIZE($30) .frame $30, FRAMESIZE, $26 /* Preserve all registers that C normally doesn't. */ @@ -146,30 +335,21 @@ _dl_runtime_resolve: lda $30, FRAMESIZE($30) jmp $31, ($27) - .end _dl_runtime_resolve + .end _dl_runtime_resolve_old - .globl _dl_runtime_profile - .usepv _dl_runtime_profile, no - .type _dl_runtime_profile, @function + .globl _dl_runtime_profile_old + .usepv _dl_runtime_profile_old, no + .type _dl_runtime_profile_old, @function /* We save the registers in a different order than desired by .mask/.fmask, so we have to use explicit cfi directives. */ cfi_startproc -.macro savei regno, offset - stq $\regno, \offset($30) - cfi_rel_offset(\regno, \offset) -.endm - -.macro savef regno, offset - stt $f\regno, \offset($30) - cfi_rel_offset(\regno+32, \offset) -.endm - #undef FRAMESIZE #define FRAMESIZE 50*8 -_dl_runtime_profile: + .align 4 +_dl_runtime_profile_old: lda $30, -FRAMESIZE($30) cfi_adjust_cfa_offset (FRAMESIZE) @@ -340,8 +520,8 @@ _dl_runtime_profile: ldgp $29, 0($26) /* Set up for call to _dl_call_pltexit. */ - ldq $16, 48($15) - ldq $17, 49($15) + ldq $16, 48*8($15) + ldq $17, 49*8($15) stq $0, 46*8($15) lda $18, 0($15) stq $1, 47*8($15) @@ -358,4 +538,4 @@ _dl_runtime_profile: ret cfi_endproc - .size _dl_runtime_profile, .-_dl_runtime_profile + .size _dl_runtime_profile_old, .-_dl_runtime_profile_old diff --git a/sysdeps/alpha/reml.S b/sysdeps/alpha/reml.S index bfc3be5..ae291b0 100644 --- a/sysdeps/alpha/reml.S +++ b/sysdeps/alpha/reml.S @@ -38,7 +38,7 @@ .text .align 4 .globl __reml - .type __reml, @function + .type __reml, @funcnoplt .usepv __reml, no cfi_startproc diff --git a/sysdeps/alpha/remq.S b/sysdeps/alpha/remq.S index 645a834..64e958b 100644 --- a/sysdeps/alpha/remq.S +++ b/sysdeps/alpha/remq.S @@ -43,7 +43,7 @@ .text .align 4 .globl __remq - .type __remq, @function + .type __remq, @funcnoplt .usepv __remq, no cfi_startproc diff --git a/sysdeps/alpha/remqu.S b/sysdeps/alpha/remqu.S index bfa78df..dcc1c88 100644 --- a/sysdeps/alpha/remqu.S +++ b/sysdeps/alpha/remqu.S @@ -43,7 +43,7 @@ .text .align 4 .globl __remqu - .type __remqu, @function + .type __remqu, @funcnoplt .usepv __remqu, no cfi_startproc -- 2.7.4