From 47f6dab9a3fb353b0faca52fcc07d2f57c4d906c Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Mon, 28 Jan 2008 05:59:24 +0000 Subject: [PATCH] Rewrite SPU overlay handling code. Put overlay calls stubs in the overlays where possible. Use a faster call stub, or optionally at compile time, a more compact stub. Double size of _ovly_buf_table so that low bit of _ovly_table.buf can be used as a "present" bit. Reserve an extra _ovly_table entry for index zero. --- bfd/ChangeLog | 43 ++ bfd/elf32-spu.c | 1144 ++++++++++++++++++++------------------------ bfd/elf32-spu.h | 27 +- ld/ChangeLog | 12 + ld/emultempl/spu_ovl.S | 884 ++++++++++++++-------------------- ld/emultempl/spu_ovl.o | Bin 1432 -> 1524 bytes ld/emultempl/spuelf.em | 47 +- ld/testsuite/ChangeLog | 5 + ld/testsuite/ld-spu/ovl.d | 213 +++++---- ld/testsuite/ld-spu/ovl2.d | 90 ++-- 10 files changed, 1160 insertions(+), 1305 deletions(-) diff --git a/bfd/ChangeLog b/bfd/ChangeLog index bfdc204..e95e50f 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,46 @@ +2008-01-28 Alan Modra + + Rewrite SPU overlay handling code. Put overlay calls stubs in the + overlays where possible. Use a faster call stub, or optionally at + compile time, a more compact stub. Double size of _ovly_buf_table + so that low bit of _ovly_table.buf can be used as a "present" bit. + Reserve an extra _ovly_table entry for index zero. + * elf32-spu.c: (struct spu_link_hash_table): Delete a number of fields, + add new ones. + (struct got_entry): New. + (struct spu_stub_hash_entry): Delete. + (stub_hash_newfunc, spu_elf_link_hash_table_free): Delete. + (spu_elf_link_hash_table_create): Adjust for struct changes. + (spu_stub_name): Delete. + (spu_elf_find_overlays): Don't track sections from overlay regions. + Instead set ovl_buf number in spu elf section data. Error if + overlays in one region don't start at the same address. Adjust + for struct _spu_elf_section_data changes. + (SIZEOF_STUB1, SIZEOF_STUB2, ILA_79, ILA_78): Delete. + (OVL_STUB_SIZE, BRSL, LNOP, ILA): Define. + (needs_ovl_stub): Adjust for struct _spu_elf_section_data changes. + (enum _insn_type): New. + (count_stub, build_stub): New functions. + (allocate_spuear_stubs): Use count_stub. + (build_spuear_stubs): Use build_stub. + (populate_stubs, sort_stubs): Delete. + (process_stubs): New function. + (write_one_stub): Delete. + (spu_elf_size_stubs, spu_elf_build_stubs): Rewrite. + (alloc_stack_info): Adjust for struct _spu_elf_section_data changes. + (maybe_insert_function, check_function_ranges): Likewise. + (find_function, pasted_function, build_call_tree): Likewise. + (spu_elf_stack_analysis, spu_elf_modify_segment_map): Likewise. + (spu_elf_modify_program_headers): Likewise. + (interesting_section): Detect stub sections differently. Delete + htab param, adjust all callers. + (spu_elf_relocate_section): Rewrite stub handling. + (spu_elf_output_symbol_hook): Likewise. + (bfd_elf32_bfd_link_hash_table_free): Delete. + * elf32-spu.h (struct _spu_elf_section_data): Move input-only and + output-only fields into a union. Add ovl_buf. + (spu_elf_size_stubs, spu_elf_build_stubs): Update prototypes. + 2008-01-25 DJ Delorie * elf32-m32c.c (_bfd_m32c_elf_eh_frame_address_size): New. diff --git a/bfd/elf32-spu.c b/bfd/elf32-spu.c index 62a922a..d9ba935 100644 --- a/bfd/elf32-spu.c +++ b/bfd/elf32-spu.c @@ -257,31 +257,21 @@ struct spu_link_hash_table { struct elf_link_hash_table elf; - /* The stub hash table. */ - struct bfd_hash_table stub_hash_table; - - /* Sorted array of stubs. */ - struct { - struct spu_stub_hash_entry **sh; - unsigned int count; - int err; - } stubs; - /* Shortcuts to overlay sections. */ - asection *stub; asection *ovtab; + asection *toe; + asection **ovl_sec; + + /* Count of stubs in each overlay section. */ + unsigned int *stub_count; + + /* The stub section for each overlay section. */ + asection **stub_sec; struct elf_link_hash_entry *ovly_load; + struct elf_link_hash_entry *ovly_return; unsigned long ovly_load_r_symndx; - /* An array of two output sections per overlay region, chosen such that - the first section vma is the overlay buffer vma (ie. the section has - the lowest vma in the group that occupy the region), and the second - section vma+size specifies the end of the region. We keep pointers - to sections like this because section vmas may change when laying - them out. */ - asection **ovl_region; - /* Number of overlay buffers. */ unsigned int num_buf; @@ -296,7 +286,7 @@ struct spu_link_hash_table unsigned int non_overlay_stubs : 1; /* Set on error. */ - unsigned int stub_overflow : 1; + unsigned int stub_err : 1; /* Set if stack size analysis should be done. */ unsigned int stack_analysis : 1; @@ -305,54 +295,17 @@ struct spu_link_hash_table unsigned int emit_stack_syms : 1; }; -#define spu_hash_table(p) \ - ((struct spu_link_hash_table *) ((p)->hash)) +/* Hijack the generic got fields for overlay stub accounting. */ -struct spu_stub_hash_entry +struct got_entry { - struct bfd_hash_entry root; - - /* Destination of this stub. */ - asection *target_section; - bfd_vma target_off; - - /* Offset of entry in stub section. */ - bfd_vma off; - - /* Offset from this stub to stub that loads the overlay index. */ - bfd_vma delta; + struct got_entry *next; + unsigned int ovl; + bfd_vma stub_addr; }; -/* Create an entry in a spu stub hash table. */ - -static struct bfd_hash_entry * -stub_hash_newfunc (struct bfd_hash_entry *entry, - struct bfd_hash_table *table, - const char *string) -{ - /* Allocate the structure if it has not already been allocated by a - subclass. */ - if (entry == NULL) - { - entry = bfd_hash_allocate (table, sizeof (struct spu_stub_hash_entry)); - if (entry == NULL) - return entry; - } - - /* Call the allocation method of the superclass. */ - entry = bfd_hash_newfunc (entry, table, string); - if (entry != NULL) - { - struct spu_stub_hash_entry *sh = (struct spu_stub_hash_entry *) entry; - - sh->target_section = NULL; - sh->target_off = 0; - sh->off = 0; - sh->delta = 0; - } - - return entry; -} +#define spu_hash_table(p) \ + ((struct spu_link_hash_table *) ((p)->hash)) /* Create a spu ELF linker hash table. */ @@ -373,28 +326,16 @@ spu_elf_link_hash_table_create (bfd *abfd) return NULL; } - /* Init the stub hash table too. */ - if (!bfd_hash_table_init (&htab->stub_hash_table, stub_hash_newfunc, - sizeof (struct spu_stub_hash_entry))) - return NULL; - - memset (&htab->stubs, 0, - sizeof (*htab) - offsetof (struct spu_link_hash_table, stubs)); + memset (&htab->ovtab, 0, + sizeof (*htab) - offsetof (struct spu_link_hash_table, ovtab)); + htab->elf.init_got_refcount.refcount = 0; + htab->elf.init_got_refcount.glist = NULL; + htab->elf.init_got_offset.offset = 0; + htab->elf.init_got_offset.glist = NULL; return &htab->elf.root; } -/* Free the derived linker hash table. */ - -static void -spu_elf_link_hash_table_free (struct bfd_link_hash_table *hash) -{ - struct spu_link_hash_table *ret = (struct spu_link_hash_table *) hash; - - bfd_hash_table_free (&ret->stub_hash_table); - _bfd_generic_link_hash_table_free (hash); -} - /* Find the symbol for the given R_SYMNDX in IBFD and set *HP and *SYMP to (hash, NULL) for global symbols, and (NULL, sym) for locals. Set *SYMSECP to the symbol's section. *LOCSYMSP caches local syms. */ @@ -480,51 +421,6 @@ get_sym_h (struct elf_link_hash_entry **hp, return TRUE; } -/* Build a name for an entry in the stub hash table. We can't use a - local symbol name because ld -r might generate duplicate local symbols. */ - -static char * -spu_stub_name (const asection *sym_sec, - const struct elf_link_hash_entry *h, - const Elf_Internal_Rela *rel) -{ - char *stub_name; - bfd_size_type len; - - if (h) - { - len = strlen (h->root.root.string) + 1 + 8 + 1; - stub_name = bfd_malloc (len); - if (stub_name == NULL) - return stub_name; - - sprintf (stub_name, "%s+%x", - h->root.root.string, - (int) rel->r_addend & 0xffffffff); - len -= 8; - } - else - { - len = 8 + 1 + 8 + 1 + 8 + 1; - stub_name = bfd_malloc (len); - if (stub_name == NULL) - return stub_name; - - sprintf (stub_name, "%x:%x+%x", - sym_sec->id & 0xffffffff, - (int) ELF32_R_SYM (rel->r_info) & 0xffffffff, - (int) rel->r_addend & 0xffffffff); - len = strlen (stub_name); - } - - if (stub_name[len - 2] == '+' - && stub_name[len - 1] == '0' - && stub_name[len] == 0) - stub_name[len - 2] = 0; - - return stub_name; -} - /* Create the note section if not already present. This is done early so that the linker maps the sections to the right place in the output. */ @@ -634,9 +530,7 @@ spu_elf_find_overlays (bfd *output_bfd, struct bfd_link_info *info) qsort (alloc_sec, n, sizeof (*alloc_sec), sort_sections); /* Look for overlapping vmas. Any with overlap must be overlays. - Count them. Also count the number of overlay regions and for - each region save a section from that region with the lowest vma - and another section with the highest end vma. */ + Count them. Also count the number of overlay regions. */ ovl_end = alloc_sec[0]->vma + alloc_sec[0]->size; for (ovl_index = 0, num_buf = 0, i = 1; i < n; i++) { @@ -645,19 +539,24 @@ spu_elf_find_overlays (bfd *output_bfd, struct bfd_link_info *info) { asection *s0 = alloc_sec[i - 1]; - if (spu_elf_section_data (s0)->ovl_index == 0) + if (spu_elf_section_data (s0)->u.o.ovl_index == 0) { - spu_elf_section_data (s0)->ovl_index = ++ovl_index; - alloc_sec[num_buf * 2] = s0; - alloc_sec[num_buf * 2 + 1] = s0; - num_buf++; + alloc_sec[ovl_index] = s0; + spu_elf_section_data (s0)->u.o.ovl_index = ++ovl_index; + spu_elf_section_data (s0)->u.o.ovl_buf = ++num_buf; } - spu_elf_section_data (s)->ovl_index = ++ovl_index; - if (ovl_end < s->vma + s->size) + alloc_sec[ovl_index] = s; + spu_elf_section_data (s)->u.o.ovl_index = ++ovl_index; + spu_elf_section_data (s)->u.o.ovl_buf = num_buf; + if (s0->vma != s->vma) { - ovl_end = s->vma + s->size; - alloc_sec[num_buf * 2 - 1] = s; + info->callbacks->einfo (_("%X%P: overlay sections %A and %A " + "do not start at the same address.\n"), + s0, s); + return FALSE; } + if (ovl_end < s->vma + s->size) + ovl_end = s->vma + s->size; } else ovl_end = s->vma + s->size; @@ -665,30 +564,22 @@ spu_elf_find_overlays (bfd *output_bfd, struct bfd_link_info *info) htab->num_overlays = ovl_index; htab->num_buf = num_buf; - if (ovl_index == 0) - { - free (alloc_sec); - return FALSE; - } - - alloc_sec = bfd_realloc (alloc_sec, num_buf * 2 * sizeof (*alloc_sec)); - if (alloc_sec == NULL) - return FALSE; - - htab->ovl_region = alloc_sec; - return TRUE; + htab->ovl_sec = alloc_sec; + return ovl_index != 0; } -/* One of these per stub. */ -#define SIZEOF_STUB1 8 -#define ILA_79 0x4200004f /* ila $79,function_address */ -#define BR 0x32000000 /* br stub2 */ - -/* One of these per overlay. */ -#define SIZEOF_STUB2 8 -#define ILA_78 0x4200004e /* ila $78,overlay_number */ - /* br __ovly_load */ +/* Support two sizes of overlay stubs, a slower more compact stub of two + intructions, and a faster stub of four instructions. */ +#ifndef OVL_STUB_SIZE +/* Default to faster. */ +#define OVL_STUB_SIZE 16 +/* #define OVL_STUB_SIZE 8 */ +#endif +#define BRSL 0x33000000 +#define BR 0x32000000 #define NOP 0x40200000 +#define LNOP 0x00200000 +#define ILA 0x42000000 /* Return true for all relative and absolute branch instructions. bra 00110000 0.. @@ -757,14 +648,14 @@ needs_ovl_stub (const char *sym_name, return TRUE; /* Usually, symbols in non-overlay sections don't need stubs. */ - if (spu_elf_section_data (sym_sec->output_section)->ovl_index == 0 + if (spu_elf_section_data (sym_sec->output_section)->u.o.ovl_index == 0 && !htab->non_overlay_stubs) return FALSE; /* A reference from some other section to a symbol in an overlay section needs a stub. */ - if (spu_elf_section_data (sym_sec->output_section)->ovl_index - != spu_elf_section_data (input_section->output_section)->ovl_index) + if (spu_elf_section_data (sym_sec->output_section)->u.o.ovl_index + != spu_elf_section_data (input_section->output_section)->u.o.ovl_index) return TRUE; /* If this insn isn't a branch then we are possibly taking the @@ -772,6 +663,229 @@ needs_ovl_stub (const char *sym_name, return !is_branch; } +enum _insn_type { non_branch, branch, call }; + +static bfd_boolean +count_stub (struct spu_link_hash_table *htab, + bfd *ibfd, + asection *isec, + enum _insn_type insn_type, + struct elf_link_hash_entry *h, + const Elf_Internal_Rela *irela) +{ + unsigned int ovl = 0; + struct got_entry *g, **head; + + /* If this instruction is a branch or call, we need a stub + for it. One stub per function per overlay. + If it isn't a branch, then we are taking the address of + this function so need a stub in the non-overlay area + for it. One stub per function. */ + if (insn_type != non_branch) + ovl = spu_elf_section_data (isec->output_section)->u.o.ovl_index; + + if (h != NULL) + head = &h->got.glist; + else + { + if (elf_local_got_ents (ibfd) == NULL) + { + bfd_size_type amt = (elf_tdata (ibfd)->symtab_hdr.sh_info + * sizeof (*elf_local_got_ents (ibfd))); + elf_local_got_ents (ibfd) = bfd_zmalloc (amt); + if (elf_local_got_ents (ibfd) == NULL) + return FALSE; + } + head = elf_local_got_ents (ibfd) + ELF32_R_SYM (irela->r_info); + } + + /* If we have a stub in the non-overlay area then there's no need + for one in overlays. */ + g = *head; + if (g != NULL && g->ovl == 0) + return TRUE; + + if (ovl == 0) + { + struct got_entry *gnext; + + /* Need a new non-overlay area stub. Zap other stubs. */ + for (; g != NULL; g = gnext) + { + htab->stub_count[g->ovl] -= 1; + gnext = g->next; + free (g); + } + } + else + { + for (; g != NULL; g = g->next) + if (g->ovl == ovl) + break; + } + + if (g == NULL) + { + g = bfd_malloc (sizeof *g); + if (g == NULL) + return FALSE; + g->ovl = ovl; + g->stub_addr = (bfd_vma) -1; + g->next = *head; + *head = g; + + htab->stub_count[ovl] += 1; + } + + return TRUE; +} + +/* Two instruction overlay stubs look like: + + brsl $75,__ovly_load + .word target_ovl_and_address + + ovl_and_address is a word with the overlay number in the top 14 bits + and local store address in the bottom 18 bits. + + Four instruction overlay stubs look like: + + ila $78,ovl_number + lnop + ila $79,target_address + br __ovly_load */ + +static bfd_boolean +build_stub (struct spu_link_hash_table *htab, + bfd *ibfd, + asection *isec, + enum _insn_type insn_type, + struct elf_link_hash_entry *h, + const Elf_Internal_Rela *irela, + bfd_vma dest, + asection *dest_sec) +{ + unsigned int ovl; + struct got_entry *g, **head; + asection *sec; + bfd_vma val, from, to; + + ovl = 0; + if (insn_type != non_branch) + ovl = spu_elf_section_data (isec->output_section)->u.o.ovl_index; + + if (h != NULL) + head = &h->got.glist; + else + head = elf_local_got_ents (ibfd) + ELF32_R_SYM (irela->r_info); + + g = *head; + if (g != NULL && g->ovl == 0 && ovl != 0) + return TRUE; + + for (; g != NULL; g = g->next) + if (g->ovl == ovl) + break; + if (g == NULL) + abort (); + + if (g->stub_addr != (bfd_vma) -1) + return TRUE; + + sec = htab->stub_sec[ovl]; + dest += dest_sec->output_offset + dest_sec->output_section->vma; + from = sec->size + sec->output_offset + sec->output_section->vma; + g->stub_addr = from; + to = (htab->ovly_load->root.u.def.value + + htab->ovly_load->root.u.def.section->output_offset + + htab->ovly_load->root.u.def.section->output_section->vma); + val = to - from; + if (OVL_STUB_SIZE == 16) + val -= 12; + if (((dest | to | from) & 3) != 0 + || val + 0x20000 >= 0x40000) + { + htab->stub_err = 1; + return FALSE; + } + ovl = spu_elf_section_data (dest_sec->output_section)->u.o.ovl_index; + + if (OVL_STUB_SIZE == 16) + { + bfd_put_32 (sec->owner, ILA + ((ovl << 7) & 0x01ffff80) + 78, + sec->contents + sec->size); + bfd_put_32 (sec->owner, LNOP, + sec->contents + sec->size + 4); + bfd_put_32 (sec->owner, ILA + ((dest << 7) & 0x01ffff80) + 79, + sec->contents + sec->size + 8); + bfd_put_32 (sec->owner, BR + ((val << 5) & 0x007fff80), + sec->contents + sec->size + 12); + } + else if (OVL_STUB_SIZE == 8) + { + bfd_put_32 (sec->owner, BRSL + ((val << 5) & 0x007fff80) + 75, + sec->contents + sec->size); + + val = (dest & 0x3ffff) | (ovl << 14); + bfd_put_32 (sec->owner, val, + sec->contents + sec->size + 4); + } + else + abort (); + sec->size += OVL_STUB_SIZE; + + if (htab->emit_stub_syms) + { + size_t len; + char *name; + int add; + + len = 8 + sizeof (".ovl_call.") - 1; + if (h != NULL) + len += strlen (h->root.root.string); + else + len += 8 + 1 + 8; + add = 0; + if (irela != NULL) + add = (int) irela->r_addend & 0xffffffff; + if (add != 0) + len += 1 + 8; + name = bfd_malloc (len); + if (name == NULL) + return FALSE; + + sprintf (name, "%08x.ovl_call.", g->ovl); + if (h != NULL) + strcpy (name + 8 + sizeof (".ovl_call.") - 1, h->root.root.string); + else + sprintf (name + 8 + sizeof (".ovl_call.") - 1, "%x:%x", + dest_sec->id & 0xffffffff, + (int) ELF32_R_SYM (irela->r_info) & 0xffffffff); + if (add != 0) + sprintf (name + len - 9, "+%x", add); + + h = elf_link_hash_lookup (&htab->elf, name, TRUE, TRUE, FALSE); + free (name); + if (h == NULL) + return FALSE; + if (h->root.type == bfd_link_hash_new) + { + h->root.type = bfd_link_hash_defined; + h->root.u.def.section = sec; + h->root.u.def.value = sec->size - OVL_STUB_SIZE; + h->size = OVL_STUB_SIZE; + h->type = STT_FUNC; + h->ref_regular = 1; + h->def_regular = 1; + h->ref_regular_nonweak = 1; + h->forced_local = 1; + h->non_elf = 0; + } + } + + return TRUE; +} + /* Called via elf_link_hash_traverse to allocate stubs for any _SPUEAR_ symbols. */ @@ -786,103 +900,47 @@ allocate_spuear_stubs (struct elf_link_hash_entry *h, void *inf) && strncmp (h->root.root.string, "_SPUEAR_", 8) == 0) { struct spu_link_hash_table *htab = inf; - static Elf_Internal_Rela zero_rel; - char *stub_name = spu_stub_name (h->root.u.def.section, h, &zero_rel); - struct spu_stub_hash_entry *sh; - - if (stub_name == NULL) - { - htab->stubs.err = 1; - return FALSE; - } - - sh = (struct spu_stub_hash_entry *) - bfd_hash_lookup (&htab->stub_hash_table, stub_name, TRUE, FALSE); - if (sh == NULL) - { - free (stub_name); - return FALSE; - } - - /* If this entry isn't new, we already have a stub. */ - if (sh->target_section != NULL) - { - free (stub_name); - return TRUE; - } - sh->target_section = h->root.u.def.section; - sh->target_off = h->root.u.def.value; - htab->stubs.count += 1; + count_stub (htab, NULL, NULL, non_branch, h, NULL); } return TRUE; } -/* Called via bfd_hash_traverse to set up pointers to all symbols - in the stub hash table. */ - static bfd_boolean -populate_stubs (struct bfd_hash_entry *bh, void *inf) +build_spuear_stubs (struct elf_link_hash_entry *h, void *inf) { - struct spu_link_hash_table *htab = inf; + /* Symbols starting with _SPUEAR_ need a stub because they may be + invoked by the PPU. */ + if ((h->root.type == bfd_link_hash_defined + || h->root.type == bfd_link_hash_defweak) + && h->def_regular + && strncmp (h->root.root.string, "_SPUEAR_", 8) == 0) + { + struct spu_link_hash_table *htab = inf; - htab->stubs.sh[--htab->stubs.count] = (struct spu_stub_hash_entry *) bh; + build_stub (htab, NULL, NULL, non_branch, h, NULL, + h->root.u.def.value, h->root.u.def.section); + } + return TRUE; } -/* qsort predicate to sort stubs by overlay number. */ +/* Size or build stubs. */ -static int -sort_stubs (const void *a, const void *b) -{ - const struct spu_stub_hash_entry *const *sa = a; - const struct spu_stub_hash_entry *const *sb = b; - int i; - bfd_signed_vma d; - - i = spu_elf_section_data ((*sa)->target_section->output_section)->ovl_index; - i -= spu_elf_section_data ((*sb)->target_section->output_section)->ovl_index; - if (i != 0) - return i; - - d = ((*sa)->target_section->output_section->vma - + (*sa)->target_section->output_offset - + (*sa)->target_off - - (*sb)->target_section->output_section->vma - - (*sb)->target_section->output_offset - - (*sb)->target_off); - if (d != 0) - return d < 0 ? -1 : 1; - - /* Two functions at the same address. Aliases perhaps. */ - i = strcmp ((*sb)->root.string, (*sa)->root.string); - BFD_ASSERT (i != 0); - return i; -} - -/* Allocate space for overlay call and return stubs. */ - -bfd_boolean -spu_elf_size_stubs (bfd *output_bfd, - struct bfd_link_info *info, - int non_overlay_stubs, - int stack_analysis, - asection **stub, - asection **ovtab, - asection **toe) +static bfd_boolean +process_stubs (bfd *output_bfd, + struct bfd_link_info *info, + bfd_boolean build) { struct spu_link_hash_table *htab = spu_hash_table (info); bfd *ibfd; - unsigned i, group; - flagword flags; - htab->non_overlay_stubs = non_overlay_stubs; for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next) { extern const bfd_target bfd_elf32_spu_vec; Elf_Internal_Shdr *symtab_hdr; - asection *section; + asection *isec; Elf_Internal_Sym *local_syms = NULL; void *psyms; @@ -896,37 +954,36 @@ spu_elf_size_stubs (bfd *output_bfd, /* Arrange to read and keep global syms for later stack analysis. */ psyms = &local_syms; - if (stack_analysis) + if (htab->stack_analysis) psyms = &symtab_hdr->contents; /* Walk over each section attached to the input bfd. */ - for (section = ibfd->sections; section != NULL; section = section->next) + for (isec = ibfd->sections; isec != NULL; isec = isec->next) { Elf_Internal_Rela *internal_relocs, *irelaend, *irela; /* If there aren't any relocs, then there's nothing more to do. */ - if ((section->flags & SEC_RELOC) == 0 - || (section->flags & SEC_ALLOC) == 0 - || (section->flags & SEC_LOAD) == 0 - || section->reloc_count == 0) + if ((isec->flags & SEC_RELOC) == 0 + || (isec->flags & SEC_ALLOC) == 0 + || (isec->flags & SEC_LOAD) == 0 + || isec->reloc_count == 0) continue; /* If this section is a link-once section that will be discarded, then don't create any stubs. */ - if (section->output_section == NULL - || section->output_section->owner != output_bfd) + if (isec->output_section == NULL + || isec->output_section->owner != output_bfd) continue; /* Get the relocs. */ - internal_relocs - = _bfd_elf_link_read_relocs (ibfd, section, NULL, NULL, - info->keep_memory); + internal_relocs = _bfd_elf_link_read_relocs (ibfd, isec, NULL, NULL, + info->keep_memory); if (internal_relocs == NULL) goto error_ret_free_local; /* Now examine each relocation. */ irela = internal_relocs; - irelaend = irela + section->reloc_count; + irelaend = irela + isec->reloc_count; for (; irela < irelaend; irela++) { enum elf_spu_reloc_type r_type; @@ -935,10 +992,8 @@ spu_elf_size_stubs (bfd *output_bfd, Elf_Internal_Sym *sym; struct elf_link_hash_entry *h; const char *sym_name; - char *stub_name; - struct spu_stub_hash_entry *sh; unsigned int sym_type; - enum _insn_type { non_branch, branch, call } insn_type; + enum _insn_type insn_type; r_type = ELF32_R_TYPE (irela->r_info); r_indx = ELF32_R_SYM (irela->r_info); @@ -946,7 +1001,15 @@ spu_elf_size_stubs (bfd *output_bfd, if (r_type >= R_SPU_max) { bfd_set_error (bfd_error_bad_value); - goto error_ret_free_internal; + error_ret_free_internal: + if (elf_section_data (isec)->relocs != internal_relocs) + free (internal_relocs); + error_ret_free_local: + if (local_syms != NULL + && (symtab_hdr->contents + != (unsigned char *) local_syms)) + free (local_syms); + return FALSE; } /* Determine the reloc target section. */ @@ -970,7 +1033,7 @@ spu_elf_size_stubs (bfd *output_bfd, { unsigned char insn[4]; - if (!bfd_get_section_contents (ibfd, section, insn, + if (!bfd_get_section_contents (ibfd, isec, insn, irela->r_offset, 4)) goto error_ret_free_internal; @@ -1013,50 +1076,40 @@ spu_elf_size_stubs (bfd *output_bfd, continue; } - if (!needs_ovl_stub (sym_name, sym_sec, section, htab, + if (!needs_ovl_stub (sym_name, sym_sec, isec, htab, insn_type != non_branch)) continue; - stub_name = spu_stub_name (sym_sec, h, irela); - if (stub_name == NULL) - goto error_ret_free_internal; - - sh = (struct spu_stub_hash_entry *) - bfd_hash_lookup (&htab->stub_hash_table, stub_name, - TRUE, FALSE); - if (sh == NULL) + if (htab->stub_count == NULL) { - free (stub_name); - error_ret_free_internal: - if (elf_section_data (section)->relocs != internal_relocs) - free (internal_relocs); - error_ret_free_local: - if (local_syms != NULL - && (symtab_hdr->contents - != (unsigned char *) local_syms)) - free (local_syms); - return FALSE; + bfd_size_type amt; + amt = (htab->num_overlays + 1) * sizeof (*htab->stub_count); + htab->stub_count = bfd_zmalloc (amt); + if (htab->stub_count == NULL) + goto error_ret_free_internal; } - /* If this entry isn't new, we already have a stub. */ - if (sh->target_section != NULL) + if (!build) { - free (stub_name); - continue; + if (!count_stub (htab, ibfd, isec, insn_type, h, irela)) + goto error_ret_free_internal; } - - sh->target_section = sym_sec; - if (h != NULL) - sh->target_off = h->root.u.def.value; else - sh->target_off = sym->st_value; - sh->target_off += irela->r_addend; - - htab->stubs.count += 1; + { + bfd_vma dest; + + if (h != NULL) + dest = h->root.u.def.value; + else + dest = sym->st_value; + if (!build_stub (htab, ibfd, isec, insn_type, h, irela, + dest, sym_sec)) + goto error_ret_free_internal; + } } /* We're done with the internal relocs, free them. */ - if (elf_section_data (section)->relocs != internal_relocs) + if (elf_section_data (isec)->relocs != internal_relocs) free (internal_relocs); } @@ -1070,99 +1123,64 @@ spu_elf_size_stubs (bfd *output_bfd, } } + return TRUE; +} + +/* Allocate space for overlay call and return stubs. */ + +int +spu_elf_size_stubs (bfd *output_bfd, + struct bfd_link_info *info, + void (*place_spu_section) (asection *, asection *, + const char *), + int non_overlay_stubs) +{ + struct spu_link_hash_table *htab = spu_hash_table (info); + bfd *ibfd; + bfd_size_type amt; + flagword flags; + unsigned int i; + asection *stub; + + htab->non_overlay_stubs = non_overlay_stubs; + if (!process_stubs (output_bfd, info, FALSE)) + return 0; + elf_link_hash_traverse (&htab->elf, allocate_spuear_stubs, htab); - if (htab->stubs.err) - return FALSE; + if (htab->stub_err) + return 0; - *stub = NULL; - if (htab->stubs.count == 0) - return TRUE; + if (htab->stub_count == NULL) + return 1; ibfd = info->input_bfds; - flags = (SEC_ALLOC | SEC_LOAD | SEC_CODE | SEC_READONLY - | SEC_HAS_CONTENTS | SEC_IN_MEMORY); - htab->stub = bfd_make_section_anyway_with_flags (ibfd, ".stub", flags); - *stub = htab->stub; - if (htab->stub == NULL - || !bfd_set_section_alignment (ibfd, htab->stub, 4)) - return FALSE; + amt = (htab->num_overlays + 1) * sizeof (*htab->stub_sec); + htab->stub_sec = bfd_zmalloc (amt); + if (htab->stub_sec == NULL) + return 0; - flags = (SEC_ALLOC | SEC_LOAD + flags = (SEC_ALLOC | SEC_LOAD | SEC_CODE | SEC_READONLY | SEC_HAS_CONTENTS | SEC_IN_MEMORY); - htab->ovtab = bfd_make_section_anyway_with_flags (ibfd, ".ovtab", flags); - *ovtab = htab->ovtab; - if (htab->ovtab == NULL - || !bfd_set_section_alignment (ibfd, htab->ovtab, 4)) - return FALSE; - - *toe = bfd_make_section_anyway_with_flags (ibfd, ".toe", SEC_ALLOC); - if (*toe == NULL - || !bfd_set_section_alignment (ibfd, *toe, 4)) - return FALSE; - (*toe)->size = 16; + stub = bfd_make_section_anyway_with_flags (ibfd, ".stub", flags); + htab->stub_sec[0] = stub; + if (stub == NULL + || !bfd_set_section_alignment (ibfd, stub, 3 + (OVL_STUB_SIZE > 8))) + return 0; + stub->size = htab->stub_count[0] * OVL_STUB_SIZE; + (*place_spu_section) (stub, NULL, ".text"); - /* Retrieve all the stubs and sort. */ - htab->stubs.sh = bfd_malloc (htab->stubs.count * sizeof (*htab->stubs.sh)); - if (htab->stubs.sh == NULL) - return FALSE; - i = htab->stubs.count; - bfd_hash_traverse (&htab->stub_hash_table, populate_stubs, htab); - BFD_ASSERT (htab->stubs.count == 0); - - htab->stubs.count = i; - qsort (htab->stubs.sh, htab->stubs.count, sizeof (*htab->stubs.sh), - sort_stubs); - - /* Now that the stubs are sorted, place them in the stub section. - Stubs are grouped per overlay - . ila $79,func1 - . br 1f - . ila $79,func2 - . br 1f - . - . - . ila $79,funcn - . nop - . 1: - . ila $78,ovl_index - . br __ovly_load */ - - group = 0; - for (i = 0; i < htab->stubs.count; i++) + for (i = 0; i < htab->num_overlays; ++i) { - if (spu_elf_section_data (htab->stubs.sh[group]->target_section - ->output_section)->ovl_index - != spu_elf_section_data (htab->stubs.sh[i]->target_section - ->output_section)->ovl_index) - { - htab->stub->size += SIZEOF_STUB2; - for (; group != i; group++) - htab->stubs.sh[group]->delta - = htab->stubs.sh[i - 1]->off - htab->stubs.sh[group]->off; - } - if (group == i - || ((htab->stubs.sh[i - 1]->target_section->output_section->vma - + htab->stubs.sh[i - 1]->target_section->output_offset - + htab->stubs.sh[i - 1]->target_off) - != (htab->stubs.sh[i]->target_section->output_section->vma - + htab->stubs.sh[i]->target_section->output_offset - + htab->stubs.sh[i]->target_off))) - { - htab->stubs.sh[i]->off = htab->stub->size; - htab->stub->size += SIZEOF_STUB1; - if (info->emitrelocations) - htab->stub->reloc_count += 1; - } - else - htab->stubs.sh[i]->off = htab->stubs.sh[i - 1]->off; + asection *osec = htab->ovl_sec[i]; + unsigned int ovl = spu_elf_section_data (osec)->u.o.ovl_index; + stub = bfd_make_section_anyway_with_flags (ibfd, ".stub", flags); + htab->stub_sec[ovl] = stub; + if (stub == NULL + || !bfd_set_section_alignment (ibfd, stub, 3 + (OVL_STUB_SIZE > 8))) + return 0; + stub->size = htab->stub_count[ovl] * OVL_STUB_SIZE; + (*place_spu_section) (stub, osec, NULL); } - if (group != i) - htab->stub->size += SIZEOF_STUB2; - if (info->emitrelocations) - htab->stub->flags |= SEC_RELOC; - for (; group != i; group++) - htab->stubs.sh[group]->delta - = htab->stubs.sh[i - 1]->off - htab->stubs.sh[group]->off; /* htab->ovtab consists of two arrays. . struct { @@ -1174,12 +1192,27 @@ spu_elf_size_stubs (bfd *output_bfd, . . struct { . u32 mapped; - . } _ovly_buf_table[]; */ + . } _ovly_buf_table[]; + . */ - htab->ovtab->alignment_power = 4; - htab->ovtab->size = htab->num_overlays * 16 + htab->num_buf * 4; + flags = (SEC_ALLOC | SEC_LOAD + | SEC_HAS_CONTENTS | SEC_IN_MEMORY); + htab->ovtab = bfd_make_section_anyway_with_flags (ibfd, ".ovtab", flags); + if (htab->ovtab == NULL + || !bfd_set_section_alignment (ibfd, htab->ovtab, 4)) + return 0; - return TRUE; + htab->ovtab->size = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4; + (*place_spu_section) (htab->ovtab, NULL, ".data"); + + htab->toe = bfd_make_section_anyway_with_flags (ibfd, ".toe", SEC_ALLOC); + if (htab->toe == NULL + || !bfd_set_section_alignment (ibfd, htab->toe, 4)) + return 0; + htab->toe->size = 16; + (*place_spu_section) (htab->toe, NULL, ".toe"); + + return 2; } /* Functions to handle embedded spu_ovl.o object. */ @@ -1228,149 +1261,6 @@ spu_elf_open_builtin_lib (bfd **ovl_bfd, const struct _ovl_stream *stream) return *ovl_bfd != NULL; } -/* Fill in the ila and br for a stub. On the last stub for a group, - write the stub that sets the overlay number too. */ - -static bfd_boolean -write_one_stub (struct spu_stub_hash_entry *ent, struct bfd_link_info *info) -{ - struct spu_link_hash_table *htab = spu_hash_table (info); - asection *sec = htab->stub; - asection *s = ent->target_section; - unsigned int ovl; - bfd_vma val; - - val = ent->target_off + s->output_offset + s->output_section->vma; - bfd_put_32 (sec->owner, ILA_79 + ((val << 7) & 0x01ffff80), - sec->contents + ent->off); - val = ent->delta + 4; - bfd_put_32 (sec->owner, BR + ((val << 5) & 0x007fff80), - sec->contents + ent->off + 4); - - if (info->emitrelocations) - { - Elf_Internal_Rela *relocs, *r; - struct bfd_elf_section_data *elfsec_data; - - elfsec_data = elf_section_data (sec); - relocs = elfsec_data->relocs; - if (relocs == NULL) - { - bfd_size_type relsize; - Elf_Internal_Shdr *symtab_hdr; - struct elf_link_hash_entry **sym_hash; - unsigned long symcount; - bfd_vma amt; - - relsize = sec->reloc_count * sizeof (*relocs); - relocs = bfd_alloc (sec->owner, relsize); - if (relocs == NULL) - return FALSE; - elfsec_data->relocs = relocs; - elfsec_data->rel_hdr.sh_size - = sec->reloc_count * sizeof (Elf32_External_Rela); - elfsec_data->rel_hdr.sh_entsize = sizeof (Elf32_External_Rela); - sec->reloc_count = 0; - - /* Increase the size of symbol hash array on the bfd to - which we attached our .stub section. This hack allows - us to create relocs against global symbols. */ - symtab_hdr = &elf_tdata (sec->owner)->symtab_hdr; - symcount = symtab_hdr->sh_size / symtab_hdr->sh_entsize; - symcount -= symtab_hdr->sh_info; - amt = symcount * sizeof (*sym_hash); - sym_hash = bfd_alloc (sec->owner, amt + sizeof (*sym_hash)); - if (sym_hash == NULL) - return FALSE; - memcpy (sym_hash, elf_sym_hashes (sec->owner), amt); - sym_hash[symcount] = htab->ovly_load; - htab->ovly_load_r_symndx = symcount + symtab_hdr->sh_info; - elf_sym_hashes (sec->owner) = sym_hash; - } - r = relocs + sec->reloc_count; - sec->reloc_count += 1; - r->r_offset = ent->off + 4; - r->r_info = ELF32_R_INFO (0, R_SPU_REL16); - r->r_addend = (sec->output_section->vma - + sec->output_offset - + ent->off + 4 - + val); - } - - /* If this is the last stub of this group, write stub2. */ - if (ent->delta == 0) - { - bfd_put_32 (sec->owner, NOP, - sec->contents + ent->off + 4); - - ovl = spu_elf_section_data (s->output_section)->ovl_index; - bfd_put_32 (sec->owner, ILA_78 + ((ovl << 7) & 0x01ffff80), - sec->contents + ent->off + 8); - - val = (htab->ovly_load->root.u.def.section->output_section->vma - + htab->ovly_load->root.u.def.section->output_offset - + htab->ovly_load->root.u.def.value - - (sec->output_section->vma - + sec->output_offset - + ent->off + 12)); - - if (val + 0x20000 >= 0x40000) - htab->stub_overflow = TRUE; - - bfd_put_32 (sec->owner, BR + ((val << 5) & 0x007fff80), - sec->contents + ent->off + 12); - - if (info->emitrelocations) - { - Elf_Internal_Rela *relocs, *r; - struct bfd_elf_section_data *elfsec_data; - - elfsec_data = elf_section_data (sec); - relocs = elfsec_data->relocs; - /* The last branch is overwritten, so overwrite its reloc too. */ - r = relocs + sec->reloc_count - 1; - r->r_offset = ent->off + 12; - r->r_info = ELF32_R_INFO (htab->ovly_load_r_symndx, R_SPU_REL16); - r->r_addend = 0; - } - } - - if (htab->emit_stub_syms) - { - struct elf_link_hash_entry *h; - size_t len1, len2; - char *name; - - len1 = sizeof ("00000000.ovl_call.") - 1; - len2 = strlen (ent->root.string); - name = bfd_malloc (len1 + len2 + 1); - if (name == NULL) - return FALSE; - memcpy (name, "00000000.ovl_call.", len1); - memcpy (name + len1, ent->root.string, len2 + 1); - h = elf_link_hash_lookup (&htab->elf, name, TRUE, TRUE, FALSE); - free (name); - if (h == NULL) - return FALSE; - if (h->root.type == bfd_link_hash_new) - { - h->root.type = bfd_link_hash_defined; - h->root.u.def.section = sec; - h->root.u.def.value = ent->off; - h->size = (ent->delta == 0 - ? SIZEOF_STUB1 + SIZEOF_STUB2 : SIZEOF_STUB1); - h->type = STT_FUNC; - h->ref_regular = 1; - h->def_regular = 1; - h->ref_regular_nonweak = 1; - h->forced_local = 1; - h->non_elf = 0; - } - } - - return TRUE; -} - /* Define an STT_OBJECT symbol. */ static struct elf_link_hash_entry * @@ -1408,7 +1298,7 @@ define_ovtab_symbol (struct spu_link_hash_table *htab, const char *name) /* Fill in all stubs and the overlay tables. */ bfd_boolean -spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe) +spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms) { struct spu_link_hash_table *htab = spu_hash_table (info); struct elf_link_hash_entry *h; @@ -1418,9 +1308,19 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe) unsigned int i; htab->emit_stub_syms = emit_syms; - htab->stub->contents = bfd_zalloc (htab->stub->owner, htab->stub->size); - if (htab->stub->contents == NULL) - return FALSE; + if (htab->stub_count == NULL) + return TRUE; + + for (i = 0; i <= htab->num_overlays; i++) + if (htab->stub_sec[i]->size != 0) + { + htab->stub_sec[i]->contents = bfd_zalloc (htab->stub_sec[i]->owner, + htab->stub_sec[i]->size); + if (htab->stub_sec[i]->contents == NULL) + return FALSE; + htab->stub_sec[i]->rawsize = htab->stub_sec[i]->size; + htab->stub_sec[i]->size = 0; + } h = elf_link_hash_lookup (&htab->elf, "__ovly_load", FALSE, FALSE, FALSE); htab->ovly_load = h; @@ -1430,7 +1330,7 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe) && h->def_regular); s = h->root.u.def.section->output_section; - if (spu_elf_section_data (s)->ovl_index) + if (spu_elf_section_data (s)->u.o.ovl_index) { (*_bfd_error_handler) (_("%s in overlay section"), h->root.u.def.section->owner); @@ -1438,11 +1338,29 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe) return FALSE; } + h = elf_link_hash_lookup (&htab->elf, "__ovly_return", FALSE, FALSE, FALSE); + htab->ovly_return = h; + /* Write out all the stubs. */ - for (i = 0; i < htab->stubs.count; i++) - write_one_stub (htab->stubs.sh[i], info); + obfd = htab->ovtab->output_section->owner; + process_stubs (obfd, info, TRUE); + + elf_link_hash_traverse (&htab->elf, build_spuear_stubs, htab); + if (htab->stub_err) + return FALSE; - if (htab->stub_overflow) + for (i = 0; i <= htab->num_overlays; i++) + { + if (htab->stub_sec[i]->size != htab->stub_sec[i]->rawsize) + { + (*_bfd_error_handler) (_("stubs don't match calculated size")); + bfd_set_error (bfd_error_bad_value); + return FALSE; + } + htab->stub_sec[i]->rawsize = 0; + } + + if (htab->stub_err) { (*_bfd_error_handler) (_("overlay stub relocation overflow")); bfd_set_error (bfd_error_bad_value); @@ -1455,75 +1373,52 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe) /* Write out _ovly_table. */ p = htab->ovtab->contents; - obfd = htab->ovtab->output_section->owner; + /* set low bit of .buf to mark non-overlay area as present. */ + p[15] = 1; for (s = obfd->sections; s != NULL; s = s->next) { - unsigned int ovl_index = spu_elf_section_data (s)->ovl_index; + unsigned int ovl_index = spu_elf_section_data (s)->u.o.ovl_index; if (ovl_index != 0) { - unsigned int lo, hi, mid; - unsigned long off = (ovl_index - 1) * 16; + unsigned long off = ovl_index * 16; + unsigned int ovl_buf = spu_elf_section_data (s)->u.o.ovl_buf; + bfd_put_32 (htab->ovtab->owner, s->vma, p + off); bfd_put_32 (htab->ovtab->owner, (s->size + 15) & -16, p + off + 4); /* file_off written later in spu_elf_modify_program_headers. */ - - lo = 0; - hi = htab->num_buf; - while (lo < hi) - { - mid = (lo + hi) >> 1; - if (htab->ovl_region[2 * mid + 1]->vma - + htab->ovl_region[2 * mid + 1]->size <= s->vma) - lo = mid + 1; - else if (htab->ovl_region[2 * mid]->vma > s->vma) - hi = mid; - else - { - bfd_put_32 (htab->ovtab->owner, mid + 1, p + off + 12); - break; - } - } - BFD_ASSERT (lo < hi); + bfd_put_32 (htab->ovtab->owner, ovl_buf * 2, p + off + 12); } } - /* Write out _ovly_buf_table. */ - p = htab->ovtab->contents + htab->num_overlays * 16; - for (i = 0; i < htab->num_buf; i++) - { - bfd_put_32 (htab->ovtab->owner, 0, p); - p += 4; - } - h = define_ovtab_symbol (htab, "_ovly_table"); if (h == NULL) return FALSE; - h->root.u.def.value = 0; + h->root.u.def.value = 16; h->size = htab->num_overlays * 16; h = define_ovtab_symbol (htab, "_ovly_table_end"); if (h == NULL) return FALSE; - h->root.u.def.value = htab->num_overlays * 16; + h->root.u.def.value = htab->num_overlays * 16 + 16; h->size = 0; h = define_ovtab_symbol (htab, "_ovly_buf_table"); if (h == NULL) return FALSE; - h->root.u.def.value = htab->num_overlays * 16; - h->size = htab->num_buf * 4; + h->root.u.def.value = htab->num_overlays * 16 + 16; + h->size = htab->num_buf * 2 * 4; h = define_ovtab_symbol (htab, "_ovly_buf_table_end"); if (h == NULL) return FALSE; - h->root.u.def.value = htab->num_overlays * 16 + htab->num_buf * 4; + h->root.u.def.value = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4; h->size = 0; h = define_ovtab_symbol (htab, "_EAR_"); if (h == NULL) return FALSE; - h->root.u.def.section = toe; + h->root.u.def.section = htab->toe; h->root.u.def.value = 0; h->size = 16; @@ -1716,10 +1611,10 @@ alloc_stack_info (asection *sec, int max_fun) amt = sizeof (struct spu_elf_stack_info); amt += (max_fun - 1) * sizeof (struct function_info); - sec_data->stack_info = bfd_zmalloc (amt); - if (sec_data->stack_info != NULL) - sec_data->stack_info->max_fun = max_fun; - return sec_data->stack_info; + sec_data->u.i.stack_info = bfd_zmalloc (amt); + if (sec_data->u.i.stack_info != NULL) + sec_data->u.i.stack_info->max_fun = max_fun; + return sec_data->u.i.stack_info; } /* Add a new struct function_info describing a (part of a) function @@ -1732,7 +1627,7 @@ maybe_insert_function (asection *sec, bfd_boolean is_func) { struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec); - struct spu_elf_stack_info *sinfo = sec_data->stack_info; + struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info; int i; bfd_vma off, size; @@ -1796,7 +1691,7 @@ maybe_insert_function (asection *sec, if (sinfo == NULL) return NULL; memset ((char *) sinfo + old, 0, amt - old); - sec_data->stack_info = sinfo; + sec_data->u.i.stack_info = sinfo; } sinfo->fun[i].is_func = is_func; sinfo->fun[i].global = global; @@ -1887,7 +1782,7 @@ static bfd_boolean check_function_ranges (asection *sec, struct bfd_link_info *info) { struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec); - struct spu_elf_stack_info *sinfo = sec_data->stack_info; + struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info; int i; bfd_boolean gaps = FALSE; @@ -1933,7 +1828,7 @@ static struct function_info * find_function (asection *sec, bfd_vma offset, struct bfd_link_info *info) { struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec); - struct spu_elf_stack_info *sinfo = sec_data->stack_info; + struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info; int lo, hi, mid; lo = 0; @@ -2155,7 +2050,7 @@ pasted_function (asection *sec, struct bfd_link_info *info) } if (l->type == bfd_indirect_link_order && (sec_data = spu_elf_section_data (l->u.indirect.section)) != NULL - && (sinfo = sec_data->stack_info) != NULL + && (sinfo = sec_data->u.i.stack_info) != NULL && sinfo->num_fun != 0) fun_start = &sinfo->fun[sinfo->num_fun - 1]; } @@ -2164,15 +2059,15 @@ pasted_function (asection *sec, struct bfd_link_info *info) return FALSE; } -/* We're only interested in code sections. */ +/* We're only interested in code sections. Testing SEC_IN_MEMORY excludes + overlay stub sections. */ static bfd_boolean -interesting_section (asection *s, bfd *obfd, struct spu_link_hash_table *htab) +interesting_section (asection *s, bfd *obfd) { - return (s != htab->stub - && s->output_section != NULL + return (s->output_section != NULL && s->output_section->owner == obfd - && ((s->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE)) + && ((s->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE | SEC_IN_MEMORY)) == (SEC_ALLOC | SEC_LOAD | SEC_CODE)) && s->size != 0); } @@ -2182,7 +2077,6 @@ interesting_section (asection *s, bfd *obfd, struct spu_link_hash_table *htab) static bfd_boolean discover_functions (bfd *output_bfd, struct bfd_link_info *info) { - struct spu_link_hash_table *htab = spu_hash_table (info); bfd *ibfd; int bfd_idx; Elf_Internal_Sym ***psym_arr; @@ -2247,7 +2141,7 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info) asection *s; *p = s = bfd_section_from_elf_index (ibfd, sy->st_shndx); - if (s != NULL && interesting_section (s, output_bfd, htab)) + if (s != NULL && interesting_section (s, output_bfd)) *psy++ = sy; } symcount = psy - psyms; @@ -2289,7 +2183,7 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info) } for (sec = ibfd->sections; sec != NULL && !gaps; sec = sec->next) - if (interesting_section (sec, output_bfd, htab)) + if (interesting_section (sec, output_bfd)) gaps |= check_function_ranges (sec, info); } @@ -2307,7 +2201,7 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info) continue; for (sec = ibfd->sections; sec != NULL; sec = sec->next) - if (interesting_section (sec, output_bfd, htab) + if (interesting_section (sec, output_bfd) && sec->reloc_count != 0) { if (!mark_functions_via_relocs (sec, info, FALSE)) @@ -2334,7 +2228,7 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info) gaps = FALSE; for (sec = ibfd->sections; sec != NULL && !gaps; sec = sec->next) - if (interesting_section (sec, output_bfd, htab)) + if (interesting_section (sec, output_bfd)) gaps |= check_function_ranges (sec, info); if (!gaps) continue; @@ -2360,13 +2254,13 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info) the range of such functions to the beginning of the next symbol of interest. */ for (sec = ibfd->sections; sec != NULL; sec = sec->next) - if (interesting_section (sec, output_bfd, htab)) + if (interesting_section (sec, output_bfd)) { struct _spu_elf_section_data *sec_data; struct spu_elf_stack_info *sinfo; sec_data = spu_elf_section_data (sec); - sinfo = sec_data->stack_info; + sinfo = sec_data->u.i.stack_info; if (sinfo != NULL) { int fun_idx; @@ -2455,7 +2349,6 @@ call_graph_traverse (struct function_info *fun, struct bfd_link_info *info) static bfd_boolean build_call_tree (bfd *output_bfd, struct bfd_link_info *info) { - struct spu_link_hash_table *htab = spu_hash_table (info); bfd *ibfd; for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next) @@ -2468,7 +2361,7 @@ build_call_tree (bfd *output_bfd, struct bfd_link_info *info) for (sec = ibfd->sections; sec != NULL; sec = sec->next) { - if (!interesting_section (sec, output_bfd, htab) + if (!interesting_section (sec, output_bfd) || sec->reloc_count == 0) continue; @@ -2484,7 +2377,7 @@ build_call_tree (bfd *output_bfd, struct bfd_link_info *info) struct spu_elf_stack_info *sinfo; if ((sec_data = spu_elf_section_data (sec)) != NULL - && (sinfo = sec_data->stack_info) != NULL) + && (sinfo = sec_data->u.i.stack_info) != NULL) { int i; for (i = 0; i < sinfo->num_fun; ++i) @@ -2523,7 +2416,7 @@ build_call_tree (bfd *output_bfd, struct bfd_link_info *info) struct spu_elf_stack_info *sinfo; if ((sec_data = spu_elf_section_data (sec)) != NULL - && (sinfo = sec_data->stack_info) != NULL) + && (sinfo = sec_data->u.i.stack_info) != NULL) { int i; for (i = 0; i < sinfo->num_fun; ++i) @@ -2549,7 +2442,7 @@ build_call_tree (bfd *output_bfd, struct bfd_link_info *info) struct spu_elf_stack_info *sinfo; if ((sec_data = spu_elf_section_data (sec)) != NULL - && (sinfo = sec_data->stack_info) != NULL) + && (sinfo = sec_data->u.i.stack_info) != NULL) { int i; for (i = 0; i < sinfo->num_fun; ++i) @@ -2684,7 +2577,7 @@ spu_elf_stack_analysis (bfd *output_bfd, struct spu_elf_stack_info *sinfo; if ((sec_data = spu_elf_section_data (sec)) != NULL - && (sinfo = sec_data->stack_info) != NULL) + && (sinfo = sec_data->u.i.stack_info) != NULL) { int i; for (i = 0; i < sinfo->num_fun; ++i) @@ -2847,25 +2740,31 @@ spu_elf_relocate_section (bfd *output_bfd, addend = rel->r_addend; branch = (is_branch (contents + rel->r_offset) || is_hint (contents + rel->r_offset)); - if (needs_ovl_stub (sym_name, sec, input_section, htab, branch)) + if (htab->stub_sec != NULL + && needs_ovl_stub (sym_name, sec, input_section, htab, branch) + && (h == NULL + || (h != htab->ovly_load && h != htab->ovly_return))) { - char *stub_name; - struct spu_stub_hash_entry *sh; + unsigned int ovl = 0; + struct got_entry *g, **head; - stub_name = spu_stub_name (sec, h, rel); - if (stub_name == NULL) - return FALSE; + if (branch) + ovl = (spu_elf_section_data (input_section->output_section) + ->u.o.ovl_index); - sh = (struct spu_stub_hash_entry *) - bfd_hash_lookup (&htab->stub_hash_table, stub_name, FALSE, FALSE); - if (sh != NULL) - { - relocation = (htab->stub->output_section->vma - + htab->stub->output_offset - + sh->off); - addend = 0; - } - free (stub_name); + if (h != NULL) + head = &h->got.glist; + else + head = elf_local_got_ents (input_bfd) + r_symndx; + + for (g = *head; g != NULL; g = g->next) + if (g->ovl == ovl || g->ovl == 0) + break; + if (g == NULL) + abort (); + + relocation = g->stub_addr; + addend = 0; } r = _bfd_final_link_relocate (howto, @@ -2960,30 +2859,22 @@ spu_elf_output_symbol_hook (struct bfd_link_info *info, struct spu_link_hash_table *htab = spu_hash_table (info); if (!info->relocatable - && htab->num_overlays != 0 + && htab->stub_sec != NULL && h != NULL && (h->root.type == bfd_link_hash_defined || h->root.type == bfd_link_hash_defweak) && h->def_regular && strncmp (h->root.root.string, "_SPUEAR_", 8) == 0) { - static Elf_Internal_Rela zero_rel; - char *stub_name = spu_stub_name (h->root.u.def.section, h, &zero_rel); - struct spu_stub_hash_entry *sh; + struct got_entry *g = h->got.glist; - if (stub_name == NULL) - return FALSE; - sh = (struct spu_stub_hash_entry *) - bfd_hash_lookup (&htab->stub_hash_table, stub_name, FALSE, FALSE); - free (stub_name); - if (sh == NULL) - return TRUE; - sym->st_shndx - = _bfd_elf_section_from_bfd_section (htab->stub->output_section->owner, - htab->stub->output_section); - sym->st_value = (htab->stub->output_section->vma - + htab->stub->output_offset - + sh->off); + if (g != NULL && g->ovl == 0) + { + sym->st_shndx = (_bfd_elf_section_from_bfd_section + (htab->stub_sec[0]->output_section->owner, + htab->stub_sec[0]->output_section)); + sym->st_value = g->stub_addr; + } } return TRUE; @@ -3049,7 +2940,7 @@ spu_elf_modify_segment_map (bfd *abfd, struct bfd_link_info *info) if (m->p_type == PT_LOAD && m->count > 1) for (i = 0; i < m->count; i++) if ((s = m->sections[i]) == toe - || spu_elf_section_data (s)->ovl_index != 0) + || spu_elf_section_data (s)->u.o.ovl_index != 0) { struct elf_segment_map *m2; bfd_vma amt; @@ -3148,7 +3039,7 @@ spu_elf_modify_program_headers (bfd *abfd, struct bfd_link_info *info) for (i = 0, m = elf_tdata (abfd)->segment_map; m; ++i, m = m->next) if (m->count != 0 - && (o = spu_elf_section_data (m->sections[0])->ovl_index) != 0) + && (o = spu_elf_section_data (m->sections[0])->u.o.ovl_index) != 0) { /* Mark this as an overlay header. */ phdr[i].p_flags |= PF_OVERLAY; @@ -3156,7 +3047,7 @@ spu_elf_modify_program_headers (bfd *abfd, struct bfd_link_info *info) if (htab->ovtab != NULL && htab->ovtab->size != 0) { bfd_byte *p = htab->ovtab->contents; - unsigned int off = (o - 1) * 16 + 8; + unsigned int off = o * 16 + 8; /* Write file_off into _ovly_table. */ bfd_put_32 (htab->ovtab->owner, phdr[i].p_offset, p + off); @@ -3226,7 +3117,6 @@ spu_elf_modify_program_headers (bfd *abfd, struct bfd_link_info *info) #define elf_backend_link_output_symbol_hook spu_elf_output_symbol_hook #define bfd_elf32_new_section_hook spu_elf_new_section_hook #define bfd_elf32_bfd_link_hash_table_create spu_elf_link_hash_table_create -#define bfd_elf32_bfd_link_hash_table_free spu_elf_link_hash_table_free #define elf_backend_additional_program_headers spu_elf_additional_program_headers #define elf_backend_modify_segment_map spu_elf_modify_segment_map diff --git a/bfd/elf32-spu.h b/bfd/elf32-spu.h index 1c421d9..4478e20 100644 --- a/bfd/elf32-spu.h +++ b/bfd/elf32-spu.h @@ -26,12 +26,20 @@ struct _spu_elf_section_data { struct bfd_elf_section_data elf; - /* Stack analysis info kept for this section. */ + union { + /* Info kept for input sections. */ + struct { + /* Stack analysis info kept for this section. */ + struct spu_elf_stack_info *stack_info; + } i; - struct spu_elf_stack_info *stack_info; - - /* Non-zero for overlay output sections. */ - unsigned int ovl_index; + /* Info kept for output sections. */ + struct { + /* Non-zero for overlay output sections. */ + unsigned int ovl_index; + unsigned int ovl_buf; + } o; + } u; }; #define spu_elf_section_data(sec) \ @@ -49,9 +57,8 @@ extern bfd_boolean spu_elf_open_builtin_lib (bfd **, extern bfd_boolean spu_elf_create_sections (bfd *, struct bfd_link_info *, int, int); extern bfd_boolean spu_elf_find_overlays (bfd *, struct bfd_link_info *); -extern bfd_boolean spu_elf_size_stubs (bfd *, struct bfd_link_info *, int, int, - asection **, asection **, - asection **); -extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int, - asection *); +extern int spu_elf_size_stubs (bfd *, struct bfd_link_info *, + void (*) (asection *, asection *, const char *), + int); +extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int); extern asection *spu_elf_check_vma (bfd *, bfd_vma, bfd_vma); diff --git a/ld/ChangeLog b/ld/ChangeLog index 071e5b9..2a72f89 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,15 @@ +2008-01-28 Alan Modra + + * emultempl/spu_ovl.S: Rewrite. + * emultempl/spu_ovl.o: Regenerate. + * emultempl/spuelf.em (toe): Delete. + (spu_place_special_section): Add param to control section placement. + Adjust callers. + (spu_elf_load_ovl_mgr): Adjust for struct _spu_elf_section_data + changes. + (spu_before_allocation): Adjust spu_elf_size_stubs call. + (gld${EMULATION_NAME}_finish): Adjust spu_elf_build_stubs call. + 2008-01-25 H.J. Lu PR ld/5670 diff --git a/ld/emultempl/spu_ovl.S b/ld/emultempl/spu_ovl.S index 66dd69b..3f9c83b 100644 --- a/ld/emultempl/spu_ovl.S +++ b/ld/emultempl/spu_ovl.S @@ -19,295 +19,242 @@ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ -/** - * MFC DMA defn's. - */ +/* MFC DMA defn's. */ #define MFC_GET_CMD 0x40 #define MFC_MAX_DMA_SIZE 0x4000 #define MFC_TAG_UPDATE_ALL 2 #define MFC_TAG_ID 0 +/* Register usage. */ +#define reserved1 $75 +#define parm $75 +#define tab1 reserved1 +#define tab2 reserved1 +#define vma reserved1 +#define oldvma reserved1 +#define newmask reserved1 +#define map reserved1 + +#define reserved2 $76 +#define off1 reserved2 +#define off2 reserved2 +#define present1 reserved2 +#define present2 reserved2 +#define sz reserved2 +#define cmp reserved2 +#define add64 reserved2 +#define cgbits reserved2 +#define off3 reserved2 +#define off4 reserved2 +#define off5 reserved2 +#define tagstat reserved2 + +#define reserved3 $77 +#define buf1 reserved3 +#define buf2 reserved3 +#define rv3 reserved3 +#define ealo reserved3 +#define cmd reserved3 +#define off64 reserved3 +#define tab3 reserved3 +#define tab4 reserved3 +#define tab5 reserved3 + +#define reserved4 $78 +#define ovl reserved4 +#define rv2 reserved4 +#define rv5 reserved4 +#define cgshuf reserved4 +#define newovl reserved4 + +#define reserved5 $79 +#define target reserved5 + +#define save1 $72 +#define rv4 save1 +#define rv7 save1 +#define tagid save1 +#define maxsize save1 +#define pbyte save1 +#define pbit save1 + +#define save2 $73 +#define cur save2 +#define rv6 save2 +#define osize save2 +#define zovl save2 +#define oldovl save2 +#define newvma save2 + +#define save3 $74 +#define rv1 save3 +#define ea64 save3 +#define buf3 save3 +#define genwi save3 +#define newmap save3 +#define oldmask save3 -/** - * Temporary register allocations. - * These are saved/restored here. - */ -#define tab $75 -#define cgbits $75 -#define add64 $75 -#define ealo $75 -#define newmask $75 -#define tagstat $75 -#define bchn $75 -#define rv1 $75 - -#define off $76 -#define off64 $76 -#define maxsize $76 -#define oldmask $76 -#define sz $76 -#define lnkr $76 -#define rv2 $76 - -#define cur $77 -#define cmp $77 -#define buf $77 -#define genwi $77 -#define tagid $77 -#define cmd $77 -#define rv3 $77 - -#define cgshuf $78 - -#define vma $6 - -#define map $7 -#define osize $7 -#define cmp2 $7 - -#define ea64 $8 -#define retval $8 - -#ifdef OVLY_IRQ_SAVE -#define irqtmp $8 -#define irq_stat $9 -#endif - -# Stack quadword minux N -#define SQWM1 -16*1 -#define SQWM2 -16*2 -#define SQWM3 -16*3 -#define SQWM4 -16*4 -#define SQWM5 -16*5 -#define SQWM6 -16*6 -#define SQWM7 -16*7 -#define SQWM8 -16*8 -#define SQWM9 -16*9 -#define SQWM10 -16*10 -#define SQWM11 -16*11 -#define SQWM12 -16*12 -#define SQWM13 -16*13 -#define SQWM14 -16*14 -#define SQWM15 -16*15 -#define SQWM16 -16*16 - - .extern _ovly_table - .extern _ovly_buf_table - -#ifdef OVLY_PRINTFS -#define SPE_C99_VPRINTF 37 -__entry_event_format: - .string "In entry_event_hook segment=0x%08x entry-address=0x%08x\n" -__debug_event_format: - .string "In debug_event_hook link-register=0x%08x %08x %08x %08x\n" -__dma_event_format: - .string "In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x\n" -__ovly_buf_table_format: - .string "_ovly_buf_table[%08x]=%08x\n" -#endif .text - .align 4 - .type __rv_pattern, @object - .size __rv_pattern, 16 + .align 4 + .type __rv_pattern, @object + .size __rv_pattern, 16 __rv_pattern: - .word 0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213 - .type __cg_pattern, @object - .size __cg_pattern, 16 + .word 0x00010203, 0x10111213, 0x80808080, 0x80808080 + + .type __cg_pattern, @object + .size __cg_pattern, 16 __cg_pattern: - .word 0x04050607, 0x80808080, 0x80808080, 0x80808080 + .word 0x04050607, 0x80808080, 0x80808080, 0x80808080 + + .type __ovly_current, @object + .size __ovly_current, 16 +__ovly_current: + .space 16 -/** +/* * __ovly_return - stub for returning from overlay functions. * - * inputs: - * $lr link register + * On entry the four slots of $lr are: + * __ovly_return, prev ovl index, caller return addr, undefined. * - * outputs: - * $78 old partition number, to be reloaded - * $79 return address in old partion number + * Load the previous overlay and jump to the caller return address. + * Updates __ovly_current. */ - .global __ovly_return - .type __ovly_return, @function - - .word 0 + .align 4 + .global __ovly_return + .type __ovly_return, @function __ovly_return: - shlqbyi $78, $lr, 4 - shlqbyi $79, $lr, 8 - biz $78, $79 - .size __ovly_return, . - __ovly_return - -/** + ila tab1, _ovly_table - 16 # 0,2 0 + shlqbyi ovl, $lr, 4 # 1,4 0 +#nop + shlqbyi target, $lr, 8 # 1,4 1 +#nop; lnop +#nop; lnop + shli off1, ovl, 4 # 0,4 4 +#lnop +#nop + hbr ovly_ret9, target # 1,15 5 +#nop; lnop +#nop; lnop +#nop + lqx vma, tab1, off1 # 1,6 8 +#nop; lnop +#nop; lnop +#nop; lnop +#nop; lnop +#nop; lnop +#nop + rotqbyi buf1, vma, 12 # 1,4 14 +#nop + stqd save3, -48($sp) # 1,6 15 +#nop + stqd save2, -32($sp) # 1,6 16 +#nop + stqd save1, -16($sp) # 1,6 17 + andi present1, buf1, 1 # 0,2 18 + stqd ovl, (__ovly_current - __ovly_return)($lr) # 1,6 18 +#nop; lnop +#nop + brz present1, __ovly_load_event # 1,4 20 +ovly_ret9: +#nop + bi target # 1,4 21 + +/* * __ovly_load - copy an overlay partion to local store. * - * inputs: - * $78 partition number to be loaded. - * $79 branch target in new partition. - * $lr link register, containing return addr. + * On entry $75 points to a word consisting of the overlay index in + * the top 14 bits, and the target address in the bottom 18 bits. * - * outputs: - * $lr new link register, returning through __ovly_return. - * - * Copy a new overlay partition into local store, or return - * immediately if the partition is already resident. + * Sets up $lr to return via __ovly_return. + * Updates __ovly_current. */ - .global __ovly_load - .type __ovly_load, @function - + .align 3 + .global __ovly_load + .type __ovly_load, @function __ovly_load: -/* Save temporary registers to stack. */ - stqd $6, -16($sp) - stqd $7, -32($sp) - stqd $8, -48($sp) - -#ifdef OVLY_IRQ_SAVE -/* Save irq state, then disable interrupts. */ - stqd $9, -64($sp) - ila irqtmp, __ovly_irq_save - rdch irq_stat, $SPU_RdMachStat - bid irqtmp -__ovly_irq_save: -#endif - -#ifdef OVLY_PRINTFS -//============================================== -// In entry_event_hook segment=0x%08x entry-address=0x%08x -//============================================== -# save registers - stqd $10, SQWM5($sp) - stqd $11, SQWM6($sp) - stqd $12, SQWM7($sp) -# Place input parameters onto the stack to form the -# local storage memory image. - ila $10, __entry_event_format - stqd $10, SQWM12($sp) - ai $10, $sp, SQWM9 - stqd $10, SQWM11($sp) - stqd $sp, SQWM10($sp) - stqd $78, SQWM9($sp) - stqd $79, SQWM8($sp) -# Construct a message consisting of the 8-bit opcode -# and 24-bit local store pointer to the input -# parameters and place it forllowing the stop and signal - ila $10, 0x3ffff # address mask - ilhu $11, SPE_C99_VPRINTF << 8 - ai $12, $sp, SQWM12 # parameter pointer - selb $11, $11, $12, $10 # combine command & address ptr - brsl $10, next1a -next1a: - .type next1a, @function - lqr $12, message1a - cwd $10, message1a-next1a($10) - shufb $11, $11, $12, $10 # insert msg into inst word - stqr $11, message1a # store cmd/ptr into msg word - dsync -# Notify the PPE to perform the assisted call request -# by issing a stop and signal with a signal code -# of 0x2100 (C99 class) - stop 0x2100 -message1a: - .word 0 - -# save registers - stqd $13, SQWM8($sp) - stqd $14, SQWM9($sp) - stqd $15, SQWM10($sp) - stqd $16, SQWM11($sp) - -# initialize loop - il $13, 1 - ila $14, _ovly_buf_table - ila $15, _ovly_buf_table_end - -loop_start1: -# Place input parameters onto the stack to form the -# local storage memory image. - ila $10, __ovly_buf_table_format - stqd $10, SQWM16($sp) - ai $10, $sp, SQWM13 - stqd $10, SQWM15($sp) - stqd $sp, SQWM14($sp) - stqd $13, SQWM13($sp) - lqd $16, 0($14) - rotqby $16, $16, $14 - stqd $16, SQWM12($sp) -# Construct a message consisting of the 8-bit opcode -# and 24-bit local store pointer to the input -# parameters and place it forllowing the stop and signal - ila $10, 0x3ffff # address mask - ilhu $11, SPE_C99_VPRINTF << 8 - ai $12, $sp, SQWM16 # parameter pointer - selb $11, $11, $12, $10 # combine command & address ptr - brsl $10, next1b -next1b: - .type next1b, @function - lqr $12, message1b - cwd $10, message1b-next1b($10) - shufb $11, $11, $12, $10 # insert msg into inst word - stqr $11, message1b # store cmd/ptr into msg word - dsync -# Notify the PPE to perform the assisted call request -# by issing a stop and signal with a signal code -# of 0x2100 (C99 class) - stop 0x2100 -message1b: - .word 0 - -# move to next entry - ai $13, $13, 1 - ai $14, $14, 4 - clgt $16, $15, $14 - brnz $16, loop_start1 - -# restore registers - lqd $16, SQWM11($sp) - lqd $15, SQWM10($sp) - lqd $14, SQWM9($sp) - lqd $13, SQWM8($sp) - lqd $12, SQWM7($sp) - lqd $11, SQWM6($sp) - lqd $10, SQWM5($sp) -//============================================== +#if OVL_STUB_SIZE == 8 +######## +#nop + lqd target, 0(parm) # 1,6 -11 +#nop; lnop +#nop; lnop +#nop; lnop +#nop; lnop +#nop; lnop +#nop + rotqby target, target, parm # 1,4 -5 + ila tab2, _ovly_table - 16 # 0,2 -4 + stqd save3, -48($sp) # 1,6 -4 +#nop + stqd save2, -32($sp) # 1,6 -3 +#nop + stqd save1, -16($sp) # 1,6 -2 + rotmi ovl, target, -18 # 0,4 -1 + hbr ovly_load9, target # 1,15 -1 + ila rv1, __ovly_return # 0,2 0 +#lnop +#nop; lnop +#nop + lqd cur, (__ovly_current - __ovly_return)(rv1) # 1,6 2 + shli off2, ovl, 4 # 0,4 3 + stqd ovl, (__ovly_current - __ovly_return)(rv1) # 1,6 3 + ceq rv2, $lr, rv1 # 0,2 4 + lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4 +#nop; lnop +#nop; lnop +#nop + lqx vma, tab2, off2 # 1,6 7 +######## +#else /* OVL_STUB_SIZE == 16 */ +######## + ila tab2, _ovly_table - 16 # 0,2 0 + stqd save3, -48($sp) # 1,6 0 + ila rv1, __ovly_return # 0,2 1 + stqd save2, -32($sp) # 1,6 1 + shli off2, ovl, 4 # 0,4 2 + lqa cur, __ovly_current # 1,6 2 + nop + stqa ovl, __ovly_current # 1,6 3 + ceq rv2, $lr, rv1 # 0,2 4 + lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4 +#nop + hbr ovly_load9, target # 1,15 5 +#nop + lqx vma, tab2, off2 # 1,6 6 +#nop + stqd save1, -16($sp) # 1,6 7 +######## #endif -/* Set branch hint to overlay target. */ - hbr __ovly_load_ret, $79 - -/* Get caller's overlay index by back chaining through stack frames. - * Loop until end of stack (back chain all-zeros) or - * encountered a link register we set here. */ - lqd bchn, 0($sp) - ila retval, __ovly_return - -__ovly_backchain_loop: - lqd lnkr, 16(bchn) - lqd bchn, 0(bchn) - ceq cmp, lnkr, retval - ceqi cmp2, bchn, 0 - or cmp, cmp, cmp2 - brz cmp, __ovly_backchain_loop - -/* If we reached the zero back-chain, then lnkr is bogus. Clear the - * part of lnkr that we use later (slot 3). */ - rotqbyi cmp2, cmp2, 4 - andc lnkr, lnkr, cmp2 - -/* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */ - lqd rv1, (__rv_pattern-__ovly_return+4)(retval) - shufb rv2, retval, lnkr, rv1 - shufb rv3, $lr, $78, rv1 - fsmbi rv1, 0xff - selb rv2, rv2, rv3, rv1 -/* If we have a tail call from one overlay function to another overlay, - then lr is already set up. Don't change it. */ - ceq rv1, $lr, retval - fsmb rv1, rv1 - selb $lr, rv2, $lr, rv1 - -/* Branch to $79 if non-overlay */ - brz $78, __ovly_load_restore - -/* Load values from _ovly_table[$78]. +#nop; lnop +#nop; lnop +#nop + shufb rv4, rv1, cur, rv3 # 1,4 10 +#nop + fsmb rv5, rv2 # 1,4 11 +#nop + rotqmbyi rv6, $lr, -8 # 1,4 12 +#nop + rotqbyi buf2, vma, 12 # 1,4 13 +#nop + lqd save3, -48($sp) # 1,6 14 +#nop; lnop + or rv7, rv4, rv6 # 0,2 16 + lqd save2, -32($sp) # 1,6 16 + andi present2, buf2, 1 # 0,2 17 + lnop # 1,0 17 + selb $lr, rv7, $lr, rv5 # 0,2 18 + lqd save1, -16($sp) # 1,6 18 +#nop + brz present2, __ovly_load_event # 1,4 19 +ovly_load9: +#nop + bi target # 1,4 20 + +/* If we get here, we are about to load a new overlay. + * "vma" contains the relevant entry from _ovly_table[]. * extern struct { * u32 vma; * u32 size; @@ -315,265 +262,166 @@ __ovly_backchain_loop: * u32 buf; * } _ovly_table[]; */ - shli off, $78, 4 - ila tab, _ovly_table - 16 - lqx vma, tab, off - rotqbyi buf, vma, 12 - -/* Load values from _ovly_buf_table[buf]. - * extern struct { - * u32 mapped; - * } _ovly_buf_table[]; - */ - ila tab, _ovly_buf_table - ai off, buf, -1 - shli off, off, 2 - lqx map, tab, off - rotqby cur, map, off - -/* Branch to $79 now if overlay is already mapped. */ - ceq cmp, $78, cur - brnz cmp, __ovly_load_restore - -/* Marker for profiling code. If we get here, we are about to load - * a new overlay. - */ - .global __ovly_load_event - .type __ovly_load_event, @function + .align 3 + .global __ovly_load_event + .type __ovly_load_event, @function __ovly_load_event: - -/* Set _ovly_buf_table[buf].mapped = $78. */ - cwx genwi, tab, off - shufb map, $78, map, genwi - stqx map, tab, off - -/* A new partition needs to be loaded. Prepare for DMA loop. - * _EAR_ is the 64b base EA, filled in at run time by the - * loader, and indicating the value for SPU executable image start. - */ - lqd cgshuf, (__cg_pattern-__ovly_return+4)(retval) - rotqbyi osize, vma, 4 - rotqbyi sz, vma, 8 - lqa ea64, _EAR_ - +#nop + rotqbyi sz, vma, 8 # 1,4 0 +#nop + rotqbyi osize, vma, 4 # 1,4 1 +#nop + lqa ea64, _EAR_ # 1,6 2 +#nop + lqd cgshuf, (__cg_pattern - __ovly_return)($lr) # 1,6 3 + +/* We could predict the branch at the end of this loop by adding a few + instructions, and there are plenty of free cycles to do so without + impacting loop execution time. However, it doesn't make a great + deal of sense since we need to wait for the dma to complete anyway. */ __ovly_xfer_loop: -/* 64b add to compute next ea64. */ - rotqmbyi off64, sz, -4 - cg cgbits, ea64, off64 - shufb add64, cgbits, cgbits, cgshuf - addx add64, ea64, off64 - ori ea64, add64, 0 - -/* Setup DMA parameters, then issue DMA request. */ - rotqbyi ealo, add64, 4 - ila maxsize, MFC_MAX_DMA_SIZE - cgt cmp, osize, maxsize - selb sz, osize, maxsize, cmp - ila tagid, MFC_TAG_ID - wrch $MFC_LSA, vma - wrch $MFC_EAH, ea64 - wrch $MFC_EAL, ealo - wrch $MFC_Size, sz - wrch $MFC_TagId, tagid - ila cmd, MFC_GET_CMD - wrch $MFC_Cmd, cmd - -#ifdef OVLY_PRINTFS -//============================================== -// In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x -//============================================== -# save registers - stqd $10, SQWM5($sp) - stqd $11, SQWM6($sp) - stqd $12, SQWM7($sp) -# Place input parameters onto the stack to form the -# local storage memory image. - ila $10, __dma_event_format - stqd $10, SQWM14($sp) - ai $10, $sp, SQWM11 - stqd $10, SQWM13($sp) - stqd $sp, SQWM12($sp) - stqd vma, SQWM11($sp) - stqd ea64, SQWM10($sp) - stqd ealo, SQWM9($sp) - stqd sz, SQWM8($sp) -# Construct a message consisting of the 8-bit opcode -# and 24-bit local store pointer to the input -# parameters and place it forllowing the stop and signal - ila $10, 0x3ffff # address mask - ilhu $11, SPE_C99_VPRINTF << 8 - ai $12, $sp, SQWM14 # parameter pointer - selb $11, $11, $12, $10 # combine command & address ptr - brsl $10, next3a -next3a: - .type next3a, @function - lqr $12, message3a - cwd $10, message3a-next3a($10) - shufb $11, $11, $12, $10 # insert msg into inst word - stqr $11, message3a # store cmd/ptr into msg word - dsync -# Notify the PPE to perform the assisted call request -# by issing a stop and signal with a signal code -# of 0x2100 (C99 class) - stop 0x2100 -message3a: - .word 0 - -# restore registers - lqd $12, SQWM7($sp) - lqd $11, SQWM6($sp) - lqd $10, SQWM5($sp) -//============================================== -#endif - -/* Increment vma, decrement size, branch back as needed. */ - a vma, vma, sz - sf osize, sz, osize - brnz osize, __ovly_xfer_loop - -/* Save app's tagmask, wait for DMA complete, restore mask. */ - rdch oldmask, $MFC_RdTagMask +#nop + rotqmbyi off64, sz, -4 # 1,4 4 +#nop; lnop +#nop; lnop +#nop; lnop + cg cgbits, ea64, off64 # 0,2 8 +#lnop +#nop; lnop +#nop + shufb add64, cgbits, cgbits, cgshuf # 1,4 10 +#nop; lnop +#nop; lnop +#nop; lnop + addx add64, ea64, off64 # 0,2 14 +#lnop + ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15 + lnop + ori ea64, add64, 0 # 0,2 16 + rotqbyi ealo, add64, 4 # 1,4 16 + cgt cmp, osize, maxsize # 0,2 17 + wrch $MFC_LSA, vma # 1,6 17 +#nop; lnop + selb sz, osize, maxsize, cmp # 0,2 19 + wrch $MFC_EAH, ea64 # 1,6 19 + ila tagid, MFC_TAG_ID # 0,2 20 + wrch $MFC_EAL, ealo # 1,6 20 + ila cmd, MFC_GET_CMD # 0,2 21 + wrch $MFC_Size, sz # 1,6 21 + sf osize, sz, osize # 0,2 22 + wrch $MFC_TagId, tagid # 1,6 22 + a vma, vma, sz # 0,2 23 + wrch $MFC_Cmd, cmd # 1,6 23 +#nop + brnz osize, __ovly_xfer_loop # 1,4 24 + +/* Now update our data structions while waiting for DMA to complete. + Low bit of .buf needs to be cleared on the _ovly_table entry + corresponding to the evicted overlay, and set on the entry for the + newly loaded overlay. Note that no overlay may in fact be evicted + as _ovly_buf_table[] starts with all zeros. Don't zap .buf entry + for zero index! Also of course update the _ovly_buf_table entry. */ +#nop + lqd newovl, (__ovly_current - __ovly_return)($lr) # 1,6 25 +#nop; lnop +#nop; lnop +#nop; lnop +#nop; lnop +#nop; lnop + shli off3, newovl, 4 # 0,4 31 +#lnop + ila tab3, _ovly_table - 16 # 0,2 32 +#lnop +#nop + fsmbi pbyte, 1 # 1,4 33 +#nop; lnop +#nop + lqx vma, tab3, off3 # 1,6 35 +#nop; lnop + andi pbit, pbyte, 1 # 0,2 37 + lnop +#nop; lnop +#nop; lnop +#nop; lnop + or newvma, vma, pbit # 0,2 41 + rotqbyi buf3, vma, 12 # 1,4 41 +#nop; lnop +#nop + stqx newvma, tab3, off3 # 1,6 43 +#nop; lnop + shli off4, buf3, 2 # 1,4 45 +#lnop + ila tab4, _ovly_buf_table # 0,2 46 +#lnop +#nop; lnop +#nop; lnop +#nop + lqx map, tab4, off4 # 1,6 49 +#nop + cwx genwi, tab4, off4 # 1,4 50 +#nop; lnop +#nop; lnop +#nop; lnop +#nop; lnop +#nop + rotqby oldovl, map, off4 # 1,4 55 + nop + shufb newmap, newovl, map, genwi # 0,4 56 #if MFC_TAG_ID < 16 - ilh newmask, 1 << MFC_TAG_ID + ila newmask, 1 << MFC_TAG_ID # 0,2 57 #else - ilhu newmask, 1 << (MFC_TAG_ID - 16) -#endif - wrch $MFC_WrTagMask, newmask - ila tagstat, MFC_TAG_UPDATE_ALL - wrch $MFC_WrTagUpdate, tagstat - rdch tagstat, $MFC_RdTagStat - sync - wrch $MFC_WrTagMask, oldmask - -#ifdef OVLY_PRINTFS -//============================================== -// In debug_event_hook link-register=0x%08x %08x %08x %08x -//============================================== -# save registers - stqd $10, SQWM5($sp) - stqd $11, SQWM6($sp) - stqd $12, SQWM7($sp) -# Place input parameters onto the stack to form the -# local storage memory image. - ila $10, __debug_event_format - stqd $10, SQWM14($sp) - ai $10, $sp, SQWM11 - stqd $10, SQWM13($sp) - stqd $sp, SQWM12($sp) - stqd $lr, SQWM11($sp) - rotqbyi $10, $lr, 4 - stqd $10, SQWM10($sp) - rotqbyi $10, $10, 4 - stqd $10, SQWM9($sp) - rotqbyi $10, $10, 4 - stqd $10, SQWM8($sp) -# Construct a message consisting of the 8-bit opcode -# and 24-bit local store pointer to the input -# parameters and place it forllowing the stop and signal - ila $10, 0x3ffff # address mask - ilhu $11, SPE_C99_VPRINTF << 8 - ai $12, $sp, SQWM14 # parameter pointer - selb $11, $11, $12, $10 # combine command & address ptr - brsl $10, next2a -next2a: - .type next2a, @function - lqr $12, message2a - cwd $10, message2a-next2a($10) - shufb $11, $11, $12, $10 # insert msg into inst word - stqr $11, message2a # store cmd/ptr into msg word - dsync -# Notify the PPE to perform the assisted call request -# by issing a stop and signal with a signal code -# of 0x2100 (C99 class) - stop 0x2100 -message2a: - .word 0 - -# save registers - stqd $13, SQWM8($sp) - stqd $14, SQWM9($sp) - stqd $15, SQWM10($sp) - stqd $16, SQWM11($sp) - -# initialize loop - il $13, 1 - ila $14, _ovly_buf_table - ila $15, _ovly_buf_table_end - -loop_start2: -# Place input parameters onto the stack to form the -# local storage memory image. - ila $10, __ovly_buf_table_format - stqd $10, SQWM16($sp) - ai $10, $sp, SQWM13 - stqd $10, SQWM15($sp) - stqd $sp, SQWM14($sp) - stqd $13, SQWM13($sp) - lqd $16, 0($14) - rotqby $16, $16, $14 - stqd $16, SQWM12($sp) -# Construct a message consisting of the 8-bit opcode -# and 24-bit local store pointer to the input -# parameters and place it forllowing the stop and signal - ila $10, 0x3ffff # address mask - ilhu $11, SPE_C99_VPRINTF << 8 - ai $12, $sp, SQWM16 # parameter pointer - selb $11, $11, $12, $10 # combine command & address ptr - brsl $10, next2b -next2b: - .type next2b, @function - lqr $12, message2b - cwd $10, message2b-next2b($10) - shufb $11, $11, $12, $10 # insert msg into inst word - stqr $11, message2b # store cmd/ptr into msg word - dsync -# Notify the PPE to perform the assisted call request -# by issing a stop and signal with a signal code -# of 0x2100 (C99 class) - stop 0x2100 -message2b: - .word 0 - -# move to next entry - ai $13, $13, 1 - ai $14, $14, 4 - clgt $16, $15, $14 - brnz $16, loop_start2 - -# restore registers - lqd $16, SQWM11($sp) - lqd $15, SQWM10($sp) - lqd $14, SQWM9($sp) - lqd $13, SQWM8($sp) - lqd $12, SQWM7($sp) - lqd $11, SQWM6($sp) - lqd $10, SQWM5($sp) -//============================================== + ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57 #endif +#lnop +#nop; lnop +#nop; lnop + stqx newmap, tab4, off4 # 1,6 60 + +/* Save app's tagmask, wait for DMA complete, restore mask. */ + ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61 + rdch oldmask, $MFC_RdTagMask # 1,6 61 +#nop + wrch $MFC_WrTagMask, newmask # 1,6 62 +#nop + wrch $MFC_WrTagUpdate, tagstat # 1,6 63 +#nop + rdch tagstat, $MFC_RdTagStat # 1,6 64 +#nop + sync # 1,4 65 +/* Any hint prior to the sync is lost. A hint here allows the branch + to complete 15 cycles after the hint. With no hint the branch will + take 18 or 19 cycles. */ + ila tab5, _ovly_table - 16 # 0,2 66 + hbr do_load99, target # 1,15 66 + shli off5, oldovl, 4 # 0,4 67 + wrch $MFC_WrTagMask, oldmask # 1,6 67 + ceqi zovl, oldovl, 0 # 0,2 68 +#lnop +#nop; lnop +#nop + fsm zovl, zovl # 1,4 70 +#nop + lqx oldvma, tab5, off5 # 1,6 71 +#nop + lqd save3, -48($sp) # 1,6 72 +#nop; lnop + andc pbit, pbit, zovl # 0,2 74 + lqd save2, -32($sp) # 1,6 74 +#nop; lnop +#nop; lnop + andc oldvma, oldvma, pbit # 0,2 77 + lqd save1, -16($sp) # 1,6 77 +#nop; lnop + nop + stqx oldvma, tab5, off5 # 1,6 79 +#nop; lnop - .global _ovly_debug_event - .type _ovly_debug_event, @function + .global _ovly_debug_event + .type _ovly_debug_event, @function _ovly_debug_event: -/* GDB inserts debugger trap here. */ nop - -__ovly_load_restore: -#ifdef OVLY_IRQ_SAVE -/* Conditionally re-enable interrupts. */ - andi irq_stat, irq_stat, 1 - ila irqtmp, __ovly_irq_restore - binze irq_stat, irqtmp -__ovly_irq_restore: - lqd $9, -64($sp) -#endif - -/* Restore saved registers. */ - lqd $8, -48($sp) - lqd $7, -32($sp) - lqd $6, -16($sp) - -__ovly_load_ret: /* Branch to target address. */ - bi $79 +do_load99: + bi target # 1,4 81 - .size __ovly_load, . - __ovly_load + .size __ovly_load, . - __ovly_load diff --git a/ld/emultempl/spu_ovl.o b/ld/emultempl/spu_ovl.o index a68eea3970aaced86a5b5358c14636987aebaffa..d5b37e13bd142512c50e37e9089ed6ebded79f16 100644 GIT binary patch literal 1524 zcmb7EO-~b16unbgD1sIWi6m~ONk5D+4K{^w4xr)(27DHFw~C}6OT=TT$KVeOCP^ADwm=Y zW4;FJsb}x^0@h3N{=rRz464nP{f%+Tn7+=Cua3Q*Trd0Uu9deYhv#p9@t@o&`|5n& zQJt;0#`eUBT&?2%aRfgjfG4Gl0;-xFa`x=VvlD9r>c|^`kIx70L|A?n{;NWKjsx7|X98Z!;*6l-{tA-U zw=mzQ+=FkR&mcLNA4WDAAQ&siGb9J&hbUi7)W%QHPcnA5@w%9^en1`gbI<$>^x#MZ z!9Tjde{%u91GlSJ)#8>~Na?y(+#pq5UF!k)t=x{fS}GQ`4V_F-)b!bQm&>Ozvk;iW z+gYus=JNRhW%8()=}bXUVrP?wN1xLy5%W}Si@k*nkB~ZlZ%GyWbSbL}jj7LQ>C&3e zn5V)$4Le$`)-RwHqRae7IB*}G@dBQW^*_6dv-8`Mkq13}tw;Nf$R*yNbhqmxt>7mY KJ`(P^$SYe4WMD0u3rJ$Mk)6vTs9D_Zg9LA=TO%|6l;58}Y=eBaFX&;0+to2*VKSA~E; zbra|WaAV@9Q3`s5h(Om%4e3VO+xHpTVi<6w$$uYIuF zare1`Txn!8SSv>7QgQ08UH%}}UhRKN4InSsZ>BJ(Ir%843|SOOZucoYw=m22x;GyU z$srOql51jhJVwf7G*~O64|=EKl}H%A5wEn!@ZET&6*FVVx7K>1!Ei)4XpXP`D&f4! zm_^lirA;0u0ot~zP-&NGln7WEjS(HEE6&}BiBOA#dy;Tp67EgH{S8yU#TsHC%&*)x zwKSq>xN+V`hMv_1O}%K+U`8`F8cgT&Sn)p=gP6aCu(xOw)bcQgP7j|%40#x#={g>h za9+pjxz-Nv^LP-E;f@myAFO3N2amx2rRzG>0J;6nGxE&u$1&h1(H5VXFa+KV=Y8@1 z_$)1OZzd3hGI)Gv~oS=|--j_NAd-K%LLR#{J6h1iZc5#n%4?CiVn}_w2W%#rq9#b%)Oa; zZDCQ(=5kAZ)>GxpoKas{oYjpZB5rNyM>J@>>};eyi`=pJ{bbR}NvD@*RgX99NiXP& z*ww_?P1Vz7^z`za>gM^t#J^&J|0DVR;SSmvvsLh#n{3hiu#;f}jj G$ov5>()|wr diff --git a/ld/emultempl/spuelf.em b/ld/emultempl/spuelf.em index e8333a4..7e618a5 100644 --- a/ld/emultempl/spuelf.em +++ b/ld/emultempl/spuelf.em @@ -58,8 +58,6 @@ static const struct _ovl_stream ovl_mgr_stream = { ovl_mgr + sizeof (ovl_mgr) }; -static asection *toe = NULL; - static int is_spu_target (void) @@ -84,7 +82,8 @@ spu_after_open (void) gld${EMULATION_NAME}_after_open (); } -/* Add section S at the end of output section OUTPUT_NAME. +/* If O is NULL, add section S at the end of output section OUTPUT_NAME. + If O is not NULL, add section S at the beginning of output section O. Really, we should be duplicating ldlang.c map_input_to_output_sections logic here, ie. using the linker script to find where the section @@ -95,11 +94,11 @@ spu_after_open (void) overlay manager code somewhere else. */ static void -spu_place_special_section (asection *s, const char *output_name) +spu_place_special_section (asection *s, asection *o, const char *output_name) { lang_output_section_statement_type *os; - os = lang_output_section_find (output_name); + os = lang_output_section_find (o != NULL ? o->name : output_name); if (os == NULL) { const char *save = s->name; @@ -107,6 +106,15 @@ spu_place_special_section (asection *s, const char *output_name) gld${EMULATION_NAME}_place_orphan (s); s->name = save; } + else if (o != NULL && os->children.head != NULL) + { + lang_statement_list_type add; + + lang_list_init (&add); + lang_add_section (&add, s, os); + *add.tail = os->children.head; + os->children.head = add.head; + } else lang_add_section (&os->children, s, os); @@ -154,7 +162,7 @@ spu_elf_load_ovl_mgr (void) for (in = ovl_is->the_bfd->sections; in != NULL; in = in->next) if ((in->flags & (SEC_ALLOC | SEC_LOAD)) == (SEC_ALLOC | SEC_LOAD)) - spu_place_special_section (in, ".text"); + spu_place_special_section (in, NULL, ".text"); } } @@ -164,7 +172,7 @@ spu_elf_load_ovl_mgr (void) os = os->next) if (os->bfd_section != NULL && spu_elf_section_data (os->bfd_section) != NULL - && spu_elf_section_data (os->bfd_section)->ovl_index != 0) + && spu_elf_section_data (os->bfd_section)->u.o.ovl_index != 0) { if (os->bfd_section->alignment_power < 4) os->bfd_section->alignment_power = 4; @@ -192,20 +200,15 @@ spu_before_allocation (void) /* Find overlays by inspecting section vmas. */ if (spu_elf_find_overlays (output_bfd, &link_info)) { - asection *stub, *ovtab; + int ret; - if (!spu_elf_size_stubs (output_bfd, &link_info, non_overlay_stubs, - stack_analysis, &stub, &ovtab, &toe)) + ret = spu_elf_size_stubs (output_bfd, &link_info, + spu_place_special_section, + non_overlay_stubs); + if (ret == 0) einfo ("%X%P: can not size overlay stubs: %E\n"); - - if (stub != NULL) - { - spu_place_special_section (stub, ".text"); - spu_place_special_section (ovtab, ".data"); - spu_place_special_section (toe, ".toe"); - - spu_elf_load_ovl_mgr (); - } + else if (ret == 2) + spu_elf_load_ovl_mgr (); } /* We must not cache anything from the preliminary sizing. */ @@ -235,10 +238,8 @@ gld${EMULATION_NAME}_finish (void) einfo ("%X%P: %A exceeds local store range\n", s); } - if (toe != NULL - && !spu_elf_build_stubs (&link_info, - emit_stub_syms || link_info.emitrelocations, - toe)) + if (!spu_elf_build_stubs (&link_info, + emit_stub_syms || link_info.emitrelocations)) einfo ("%X%P: can not build overlay stubs: %E\n"); finish_default (); diff --git a/ld/testsuite/ChangeLog b/ld/testsuite/ChangeLog index ec1018e..ed802de 100644 --- a/ld/testsuite/ChangeLog +++ b/ld/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2008-01-28 Alan Modra + + * ld-spu/ovl.d: Update. + * ld-spu/ovl2.d: Update. + 2008-01-26 Alan Modra * ld-elf/loadaddr.t: New, extracted from.. diff --git a/ld/testsuite/ld-spu/ovl.d b/ld/testsuite/ld-spu/ovl.d index c624659..0cd8788 100644 --- a/ld/testsuite/ld-spu/ovl.d +++ b/ld/testsuite/ld-spu/ovl.d @@ -7,65 +7,90 @@ Disassembly of section \.text: 00000100 <_start>: - 100: 1c f8 00 81 ai \$1,\$1,-32 - 104: 48 20 00 00 xor \$0,\$0,\$0 - 108: 24 00 00 80 stqd \$0,0\(\$1\) - 10c: 24 00 40 80 stqd \$0,16\(\$1\) - 110: 33 00 04 00 brsl \$0,130 <00000000\.ovl_call\.f1_a1> # 130 - 110: SPU_REL16 f1_a1 - 114: 33 00 04 80 brsl \$0,138 <00000000\.ovl_call\.f2_a1> # 138 - 114: SPU_REL16 f2_a1 - 118: 33 00 07 00 brsl \$0,150 <00000000\.ovl_call\.f1_a2> # 150 - 118: SPU_REL16 f1_a2 - 11c: 42 00 ac 09 ila \$9,344 # 158 - 11c: SPU_ADDR18 f2_a2 - 120: 35 20 04 80 bisl \$0,\$9 - 124: 1c 08 00 81 ai \$1,\$1,32 # 20 - 128: 32 7f fb 00 br 100 <_start> # 100 - 128: SPU_REL16 _start +.* ai \$1,\$1,-32 +.* xor \$0,\$0,\$0 +.* stqd \$0,0\(\$1\) +.* stqd \$0,16\(\$1\) +.* brsl \$0,.* <00000000\.ovl_call\.f1_a1>.* +.*SPU_REL16 f1_a1 +.* brsl \$0,.* <00000000\.ovl_call\.f2_a1>.* +.*SPU_REL16 f2_a1 +.* brsl \$0,.* <00000000\.ovl_call\.f1_a2>.* +.*SPU_REL16 f1_a2 +#.* ila \$9,328 # 148 +.* ila \$9,352 # 160 +.*SPU_ADDR18 f2_a2 +.* bisl \$0,\$9 +.* ai \$1,\$1,32 # 20 +.* br 100 <_start> # 100 +.*SPU_REL16 _start 0000012c : - 12c: 35 00 00 00 bi \$0 +.* bi \$0 + +#00000130 <00000000\.ovl_call\.f1_a1>: +#.* brsl \$75,.* <__ovly_load>.* +#.*00 04 04 00.* +# +#00000138 <00000000\.ovl_call\.f2_a1>: +#.* brsl \$75,.* <__ovly_load>.* +#.*00 04 04 04.* +# +#00000140 <00000000\.ovl_call\.f1_a2>: +#.* brsl \$75,.* <__ovly_load>.* +#.*00 08 04 00.* +# +#00000148 <00000000\.ovl_call\.f2_a2>: +#.* brsl \$75,.* <__ovly_load>.* +#.*00 08 04 24.* +# +#00000150 <00000000\.ovl_call\.f4_a1>: +#.* brsl \$75,.* <__ovly_load>.* +#.*00 04 04 10.* +# +#00000158 <00000000.ovl_call.14:8>: +#.* brsl \$75,.* <__ovly_load>.* +#.*00 08 04 34.* 00000130 <00000000\.ovl_call\.f1_a1>: - 130: 42 02 00 4f ila \$79,1024 # 400 - 134: 32 00 02 80 br 148 .* - 134: SPU_REL16 \*ABS\*\+0x148 - -00000138 <00000000\.ovl_call\.f2_a1>: - 138: 42 02 02 4f ila \$79,1028 # 404 - 13c: 32 00 01 80 br 148 .* - 13c: SPU_REL16 \*ABS\*\+0x148 - -00000140 <00000000\.ovl_call\.f4_a1>: - 140: 42 02 08 4f ila \$79,1040 # 410 - 144: 40 20 00 00 nop \$0 - 148: 42 00 00 ce ila \$78,1 - 14c: 32 00 0a 80 br 1a0 <__ovly_load> # 1a0 - 14c: SPU_REL16 __ovly_load - -00000150 <00000000\.ovl_call\.f1_a2>: - 150: 42 02 00 4f ila \$79,1024 # 400 - 154: 32 00 02 80 br 168 .* - 154: SPU_REL16 \*ABS\*\+0x168 - -00000158 <00000000\.ovl_call\.f2_a2>: - 158: 42 02 12 4f ila \$79,1060 # 424 - 15c: 32 00 01 80 br 168 .* - 15c: SPU_REL16 \*ABS\*\+0x168 - -00000160 <00000000\.ovl_call\.14:8>: - 160: 42 02 1a 4f ila \$79,1076 # 434 - 164: 40 20 00 00 nop \$0 - 168: 42 00 01 4e ila \$78,2 - 16c: 32 00 06 80 br 1a0 <__ovly_load> # 1a0 - 16c: SPU_REL16 __ovly_load +.* ila \$78,1 +.* lnop +.* ila \$79,1024 # 400 +.* br .* <__ovly_load>.* + +00000140 <00000000\.ovl_call\.f2_a1>: +.* ila \$78,1 +.* lnop +.* ila \$79,1028 # 404 +.* br .* <__ovly_load>.* + +00000150 <00000000.ovl_call.f1_a2>: +.* ila \$78,2 +.* lnop +.* ila \$79,1024 # 400 +.* br .* <__ovly_load>.* + +00000160 <00000000\.ovl_call\.f2_a2>: +.* ila \$78,2 +.* lnop +.* ila \$79,1060 # 424 +.* br .* <__ovly_load>.* + +00000170 <00000000\.ovl_call\.f4_a1>: +.* ila \$78,1 +.* lnop +.* ila \$79,1040 # 410 +.* br .* <__ovly_load>.* + +00000180 <00000000.ovl_call.14:8>: +.* ila \$78,2 +.* lnop +.* ila \$79,1076 # 434 +.* br .* <__ovly_load>.* + #... [0-9a-f]+ <__ovly_return>: -[0-9a-f ]+: 3f e1 00 4e shlqbyi \$78,\$0,4 -[0-9a-f ]+: 3f e2 00 4f shlqbyi \$79,\$0,8 -[0-9a-f ]+: 25 00 27 ce biz \$78,\$79 - +#... [0-9a-f]+ <__ovly_load>: #... [0-9a-f]+ <_ovly_debug_event>: @@ -73,67 +98,75 @@ Disassembly of section \.text: Disassembly of section \.ov_a1: 00000400 : - 400: 32 00 01 80 br 40c # 40c - 400: SPU_REL16 f3_a1 +.* br .* .* +.*SPU_REL16 f3_a1 00000404 : - 404: 42 00 a0 03 ila \$3,320 # 140 - 404: SPU_ADDR18 f4_a1 - 408: 35 00 00 00 bi \$0 +#.* ila \$3,336 # 150 +.* ila \$3,368 # 170 +.*SPU_ADDR18 f4_a1 +.* bi \$0 0000040c : - 40c: 35 00 00 00 bi \$0 +.* bi \$0 00000410 : - 410: 35 00 00 00 bi \$0 +.* bi \$0 \.\.\. Disassembly of section \.ov_a2: 00000400 : - 400: 24 00 40 80 stqd \$0,16\(\$1\) - 404: 24 ff 80 81 stqd \$1,-32\(\$1\) - 408: 1c f8 00 81 ai \$1,\$1,-32 - 40c: 33 7f a4 00 brsl \$0,12c # 12c - 40c: SPU_REL16 f0 - 410: 33 7f a4 00 brsl \$0,130 <00000000\.ovl_call\.f1_a1> # 130 - 410: SPU_REL16 f1_a1 - 414: 33 00 03 80 brsl \$0,430 # 430 - 414: SPU_REL16 f3_a2 - 418: 34 00 c0 80 lqd \$0,48\(\$1\) # 30 - 41c: 1c 08 00 81 ai \$1,\$1,32 # 20 - 420: 35 00 00 00 bi \$0 +.* stqd \$0,16\(\$1\) +.* stqd \$1,-32\(\$1\) +.* ai \$1,\$1,-32 +.* brsl \$0,12c # 12c +.*SPU_REL16 f0 +.* brsl \$0,130 <00000000\.ovl_call\.f1_a1> # 130 +.*SPU_REL16 f1_a1 +.* brsl \$0,430 # 430 +.*SPU_REL16 f3_a2 +.* lqd \$0,48\(\$1\) # 30 +.* ai \$1,\$1,32 # 20 +.* bi \$0 00000424 : - 424: 41 00 00 03 ilhu \$3,0 - 424: SPU_ADDR16_HI f4_a2 - 428: 60 80 b0 03 iohl \$3,352 # 160 - 428: SPU_ADDR16_LO f4_a2 - 42c: 35 00 00 00 bi \$0 +.* ilhu \$3,0 +.*SPU_ADDR16_HI f4_a2 +#.* iohl \$3,344 # 158 +.* iohl \$3,384 # 180 +.*SPU_ADDR16_LO f4_a2 +.* bi \$0 00000430 : - 430: 35 00 00 00 bi \$0 +.* bi \$0 00000434 : - 434: 32 7f ff 80 br 430 # 430 - 434: SPU_REL16 f3_a2 +.* br .* .* +.*SPU_REL16 f3_a2 \.\.\. Disassembly of section .data: -00000440 <_ovly_table>: - 440: 00 00 04 00 .* - 444: 00 00 00 20 .* - 448: 00 00 02 f0 .* +00000440 <_ovly_table-0x10>: + \.\.\. 44c: 00 00 00 01 .* +00000450 <_ovly_table>: 450: 00 00 04 00 .* - 454: 00 00 00 40 .* - 458: 00 00 03 10 .* - 45c: 00 00 00 01 .* + 454: 00 00 00 20 .* +# 458: 00 00 03 40 .* + 458: 00 00 03 70 .* + 45c: 00 00 00 02 .* + 460: 00 00 04 00 .* + 464: 00 00 00 40 .* +# 468: 00 00 03 60 .* + 468: 00 00 03 90 .* + 46c: 00 00 00 02 .* + +00000470 <_ovly_buf_table>: + \.\.\. -00000460 <_ovly_buf_table>: - 460: 00 00 00 00 .* Disassembly of section \.toe: -00000470 <_EAR_>: +00000480 <_EAR_>: \.\.\. Disassembly of section \.note\.spu_name: diff --git a/ld/testsuite/ld-spu/ovl2.d b/ld/testsuite/ld-spu/ovl2.d index bf62e03..1cd1d33 100644 --- a/ld/testsuite/ld-spu/ovl2.d +++ b/ld/testsuite/ld-spu/ovl2.d @@ -7,40 +7,50 @@ Disassembly of section \.text: 00000100 <_start>: - 100: 33 00 06 00 brsl \$0,130 <00000000\.ovl_call\.f1_a1> # 130 - 100: SPU_REL16 f1_a1 - 104: 33 00 03 80 brsl \$0,120 <00000000\.ovl_call\.10:4> # 120 - 104: SPU_REL16 setjmp - 108: 32 7f ff 00 br 100 <_start> # 100 - 108: SPU_REL16 _start +.* brsl \$0,.* <00000000\.ovl_call\.f1_a1>.* +.*SPU_REL16 f1_a1 +.* brsl \$0,.* <00000000\.ovl_call\.10:4>.* +.*SPU_REL16 setjmp +.* br 100 <_start> # 100 +.*SPU_REL16 _start 0000010c : - 10c: 35 00 00 00 bi \$0 +.* bi \$0 00000110 : - 110: 35 00 00 00 bi \$0 - ... - -00000120 <00000000\.ovl_call.10:4>: - 120: 42 00 86 4f ila \$79,268 # 10c - 124: 40 20 00 00 nop \$0 - 128: 42 00 00 4e ila \$78,0 - 12c: 32 00 0a 80 br 180 <__ovly_load> # 180 - 12c: SPU_REL16 __ovly_load - -00000130 <00000000\.ovl_call.f1_a1>: - 130: 42 02 00 4f ila \$79,1024 # 400 - 134: 40 20 00 00 nop \$0 - 138: 42 00 00 ce ila \$78,1 - 13c: 32 00 08 80 br 180 <__ovly_load> # 180 - 13c: SPU_REL16 __ovly_load +.* bi \$0 +.* + +#00000118 <00000000\.ovl_call.f1_a1>: +#.* brsl \$75,.* <__ovly_load>.* +#.*00 04 04 00.* +# +#00000120 <00000000\.ovl_call.10:4>: +#.* brsl \$75,.* <__ovly_load>.* +#.*00 00 01 0c.* +# +#00000128 <_SPUEAR_f1_a2>: +#.* brsl \$75,.* <__ovly_load>.* +#.*00 08 04 00.* + +00000120 <00000000\.ovl_call.f1_a1>: +.* ila \$78,1 +.* lnop +.* ila \$79,1024 # 400 +.* br .* <__ovly_load>.* + +00000130 <00000000\.ovl_call.10:4>: +.* ila \$78,0 +.* lnop +.* ila \$79,268 # 10c +.* br .* <__ovly_load>.* 00000140 <_SPUEAR_f1_a2>: - 140: 42 02 00 4f ila \$79,1024 # 400 - 144: 40 20 00 00 nop \$0 - 148: 42 00 01 4e ila \$78,2 - 14c: 32 00 06 80 br 180 <__ovly_load> # 180 - 14c: SPU_REL16 __ovly_load +.* ila \$78,2 +.* lnop +.* ila \$79,1024 # 400 +.* br .* <__ovly_load>.* + #... Disassembly of section \.ov_a1: @@ -55,21 +65,27 @@ Disassembly of section \.ov_a2: \.\.\. Disassembly of section \.data: -00000410 <_ovly_table>: - 410: 00 00 04 00 .* - 414: 00 00 00 10 .* - 418: 00 00 02 d0 .* +00000410 <_ovly_table-0x10>: + \.\.\. 41c: 00 00 00 01 .* +00000420 <_ovly_table>: 420: 00 00 04 00 .* 424: 00 00 00 10 .* - 428: 00 00 02 e0 .* - 42c: 00 00 00 01 .* +# 428: 00 00 03 10 .* + 428: 00 00 03 30 .* + 42c: 00 00 00 02 .* + 430: 00 00 04 00 .* + 434: 00 00 00 10 .* +# 438: 00 00 03 20 .* + 438: 00 00 03 40 .* + 43c: 00 00 00 02 .* + +00000440 <_ovly_buf_table>: + \.\.\. -00000430 <_ovly_buf_table>: - 430: 00 00 00 00 .* Disassembly of section \.toe: -00000440 <_EAR_>: +00000450 <_EAR_>: \.\.\. Disassembly of section \.note\.spu_name: -- 2.7.4