From ddb2b44229cca1b2f601adaa62f4de9df291bddf Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Wed, 29 Oct 2003 22:59:37 +0000 Subject: [PATCH] * merge.c (struct sec_merge_sec_info): Update comment. (struct sec_merge_hash_entry): Remove entsize. (sec_merge_hash_lookup): Only adjust alignment when creating. (sec_merge_emit): Remove register keyword. (cmplengthentry, last4_eq, last_eq): Delete. (strrevcmp, strrevcmp_align, is_suffix): New. (merge_strings): Use them to implement fast suffix merging. * elf-strtab.c (struct elf_strtab_hash_entry): Update comments. Make "len" signed. (_bfd_elf_strtab_add): Lose on >2G strings. (_bfd_elf_strtab_emit): Don't emit strings with len < 0. (cmplengthentry, last4_eq): Delete. (strrevcmp, is_suffix): New. (_bfd_elf_strtab_finalize): Rework to implement fast suffix merging. --- bfd/ChangeLog | 20 +++++ bfd/elf-strtab.c | 179 +++++++++++++++---------------------- bfd/merge.c | 263 ++++++++++++++++++++++--------------------------------- 3 files changed, 199 insertions(+), 263 deletions(-) diff --git a/bfd/ChangeLog b/bfd/ChangeLog index 87e516e..4ebc88e 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,23 @@ +2003-10-30 Lars Knoll + Michael Matz + Jakub Jelinek + Alan Modra + + * merge.c (struct sec_merge_sec_info): Update comment. + (struct sec_merge_hash_entry): Remove entsize. + (sec_merge_hash_lookup): Only adjust alignment when creating. + (sec_merge_emit): Remove register keyword. + (cmplengthentry, last4_eq, last_eq): Delete. + (strrevcmp, strrevcmp_align, is_suffix): New. + (merge_strings): Use them to implement fast suffix merging. + * elf-strtab.c (struct elf_strtab_hash_entry): Update comments. + Make "len" signed. + (_bfd_elf_strtab_add): Lose on >2G strings. + (_bfd_elf_strtab_emit): Don't emit strings with len < 0. + (cmplengthentry, last4_eq): Delete. + (strrevcmp, is_suffix): New. + (_bfd_elf_strtab_finalize): Rework to implement fast suffix merging. + 2003-10-29 Daniel Jacobowitz * elf32-arm.h (elf32_arm_final_link_relocate): Move check for diff --git a/bfd/elf-strtab.c b/bfd/elf-strtab.c index 764ab54..673b9d7 100644 --- a/bfd/elf-strtab.c +++ b/bfd/elf-strtab.c @@ -1,5 +1,5 @@ /* ELF strtab with GC and suffix merging support. - Copyright 2001, 2002 Free Software Foundation, Inc. + Copyright 2001, 2002, 2003 Free Software Foundation, Inc. Written by Jakub Jelinek . This file is part of BFD, the Binary File Descriptor library. @@ -30,15 +30,14 @@ struct elf_strtab_hash_entry { struct bfd_hash_entry root; - /* Length of this entry. */ - unsigned int len; + /* Length of this entry. This includes the zero terminator. */ + int len; unsigned int refcount; union { /* Index within the merged section. */ bfd_size_type index; - /* Entry this is a suffix of (if len is 0). */ + /* Entry this is a suffix of (if len < 0). */ struct elf_strtab_hash_entry *suffix; - struct elf_strtab_hash_entry *next; } u; }; @@ -158,6 +157,8 @@ _bfd_elf_strtab_add (struct elf_strtab_hash *tab, if (entry->len == 0) { entry->len = strlen (str) + 1; + /* 2G strings lose. */ + BFD_ASSERT (entry->len > 0); if (tab->size == tab->alloced) { bfd_size_type amt = sizeof (struct elf_strtab_hash_entry *); @@ -235,14 +236,14 @@ _bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab) for (i = 1; i < tab->size; ++i) { register const char *str; - register size_t len; + register unsigned int len; - str = tab->array[i]->root.string; - len = tab->array[i]->len; BFD_ASSERT (tab->array[i]->refcount == 0); - if (len == 0) + len = tab->array[i]->len; + if ((int) len < 0) continue; + str = tab->array[i]->root.string; if (bfd_bwrite (str, len, abfd) != len) return FALSE; @@ -253,40 +254,41 @@ _bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab) return TRUE; } -/* Compare two elf_strtab_hash_entry structures. This is called via qsort. */ +/* Compare two elf_strtab_hash_entry structures. Called via qsort. */ static int -cmplengthentry (const void *a, const void *b) +strrevcmp (const void *a, const void *b) { struct elf_strtab_hash_entry *A = *(struct elf_strtab_hash_entry **) a; struct elf_strtab_hash_entry *B = *(struct elf_strtab_hash_entry **) b; + unsigned int lenA = A->len; + unsigned int lenB = B->len; + const unsigned char *s = A->root.string + lenA - 1; + const unsigned char *t = B->root.string + lenB - 1; + int l = lenA < lenB ? lenA : lenB; - if (A->len < B->len) - return 1; - else if (A->len > B->len) - return -1; - - return memcmp (A->root.string, B->root.string, A->len); + while (l) + { + if (*s != *t) + return (int) *s - (int) *t; + s--; + t--; + l--; + } + return lenA - lenB; } -static int -last4_eq (const void *a, const void *b) +static inline int +is_suffix (const struct elf_strtab_hash_entry *A, + const struct elf_strtab_hash_entry *B) { - const struct elf_strtab_hash_entry *A = a; - const struct elf_strtab_hash_entry *B = b; - - if (memcmp (A->root.string + A->len - 5, B->root.string + B->len - 5, 4) - != 0) - /* This was a hashtable collision. */ - return 0; - if (A->len <= B->len) /* B cannot be a suffix of A unless A is equal to B, which is guaranteed not to be equal by the hash table. */ return 0; return memcmp (A->root.string + (A->len - B->len), - B->root.string, B->len - 5) == 0; + B->root.string, B->len - 1) == 0; } /* This function assigns final string table offsets for used strings, @@ -295,10 +297,8 @@ last4_eq (const void *a, const void *b) void _bfd_elf_strtab_finalize (struct elf_strtab_hash *tab) { - struct elf_strtab_hash_entry **array, **a, **end, *e; - htab_t last4tab = NULL; + struct elf_strtab_hash_entry **array, **a, *e; bfd_size_type size, amt; - struct elf_strtab_hash_entry *last[256], **last_ptr[256]; /* GCC 2.91.66 (egcs-1.1.2) on i386 miscompiles this function when i is a 64-bit bfd_size_type: a 64-bit target or --enable-64-bit-bfd. @@ -306,105 +306,71 @@ _bfd_elf_strtab_finalize (struct elf_strtab_hash *tab) cycles. */ size_t i; - /* Now sort the strings by length, longest first. */ - array = NULL; + /* Sort the strings by suffix and length. */ amt = tab->size * sizeof (struct elf_strtab_hash_entry *); array = bfd_malloc (amt); if (array == NULL) goto alloc_failure; - memset (last, 0, sizeof (last)); - for (i = 0; i < 256; ++i) - last_ptr[i] = &last[i]; for (i = 1, a = array; i < tab->size; ++i) - if (tab->array[i]->refcount) - *a++ = tab->array[i]; - else - tab->array[i]->len = 0; + { + e = tab->array[i]; + if (e->refcount) + { + *a++ = e; + /* Adjust the length to not include the zero terminator. */ + e->len -= 1; + } + else + e->len = 0; + } size = a - array; + if (size != 0) + { + qsort (array, size, sizeof (struct elf_strtab_hash_entry *), strrevcmp); - qsort (array, size, sizeof (struct elf_strtab_hash_entry *), cmplengthentry); + /* Loop over the sorted array and merge suffixes. Start from the + end because we want eg. - last4tab = htab_create_alloc (size * 4, NULL, last4_eq, NULL, calloc, free); - if (last4tab == NULL) - goto alloc_failure; + s1 -> "d" + s2 -> "bcd" + s3 -> "abcd" - /* Now insert the strings into hash tables (strings with last 4 characters - and strings with last character equal), look for longer strings which - we're suffix of. */ - for (a = array, end = array + size; a < end; a++) - { - register hashval_t hash; - unsigned int c; - unsigned int j; - const unsigned char *s; - void **p; - - e = *a; - if (e->len > 4) - { - s = e->root.string + e->len - 1; - hash = 0; - for (j = 0; j < 4; j++) - { - c = *--s; - hash += c + (c << 17); - hash ^= hash >> 2; - } - p = htab_find_slot_with_hash (last4tab, e, hash, INSERT); - if (p == NULL) - goto alloc_failure; - if (*p) - { - struct elf_strtab_hash_entry *ent; + to end up as - ent = *p; - e->u.suffix = ent; - e->len = 0; - continue; - } - else - *p = e; - } - else - { - struct elf_strtab_hash_entry *tem; + s3 -> "abcd" + s2 _____^ + s1 _______^ - c = e->root.string[e->len - 2] & 0xff; + ie. we don't want s1 pointing into the old s2. */ + e = *--a; + e->len += 1; + while (--a >= array) + { + struct elf_strtab_hash_entry *cmp = *a; - for (tem = last[c]; tem; tem = tem->u.next) - if (tem->len > e->len - && memcmp (tem->root.string + (tem->len - e->len), - e->root.string, e->len - 1) == 0) - break; - if (tem) + cmp->len += 1; + if (is_suffix (e, cmp)) { - e->u.suffix = tem; - e->len = 0; - continue; + cmp->u.suffix = e; + cmp->len = -cmp->len; } + else + e = cmp; } - - c = e->root.string[e->len - 2] & 0xff; - /* Put longest strings first. */ - *last_ptr[c] = e; - last_ptr[c] = &e->u.next; - e->u.next = NULL; } alloc_failure: if (array) free (array); - if (last4tab) - htab_delete (last4tab); - /* Now assign positions to the strings we want to keep. */ + /* Assign positions to the strings we want to keep. */ size = 1; for (i = 1; i < tab->size; ++i) { e = tab->array[i]; - if (e->refcount && e->len) + if (e->refcount && e->len > 0) { e->u.index = size; size += e->len; @@ -413,12 +379,11 @@ alloc_failure: tab->sec_size = size; - /* And now adjust the rest. */ + /* Adjust the rest. */ for (i = 1; i < tab->size; ++i) { e = tab->array[i]; - if (e->refcount && ! e->len) - e->u.index = e->u.suffix->u.index - + (e->u.suffix->len - strlen (e->root.string) - 1); + if (e->refcount && e->len < 0) + e->u.index = e->u.suffix->u.index + (e->u.suffix->len + e->len); } } diff --git a/bfd/merge.c b/bfd/merge.c index 0371bd0..89f45cd 100644 --- a/bfd/merge.c +++ b/bfd/merge.c @@ -34,7 +34,7 @@ struct sec_merge_sec_info; struct sec_merge_hash_entry { struct bfd_hash_entry root; - /* Length of this entry. */ + /* Length of this entry. This includes the zero terminator. */ unsigned int len; /* Start of this string needs to be aligned to alignment octets (not 1 << align). */ @@ -43,8 +43,6 @@ struct sec_merge_hash_entry { /* Index within the merged section. */ bfd_size_type index; - /* Entity size (if present in suffix hash tables). */ - unsigned int entsize; /* Entry this is a suffix of (if alignment is 0). */ struct sec_merge_hash_entry *suffix; } u; @@ -205,9 +203,12 @@ sec_merge_hash_lookup (struct sec_merge_hash *table, const char *string, alignment, we need to insert another copy. */ if (hashp->alignment < alignment) { - /* Mark the less aligned copy as deleted. */ - hashp->len = 0; - hashp->alignment = 0; + if (create) + { + /* Mark the less aligned copy as deleted. */ + hashp->len = 0; + hashp->alignment = 0; + } break; } return hashp; @@ -287,7 +288,7 @@ sec_merge_add (struct sec_merge_hash *tab, const char *str, } static bfd_boolean -sec_merge_emit (register bfd *abfd, struct sec_merge_hash_entry *entry) +sec_merge_emit (bfd *abfd, struct sec_merge_hash_entry *entry) { struct sec_merge_sec_info *secinfo = entry->secinfo; asection *sec = secinfo->sec; @@ -420,79 +421,6 @@ _bfd_merge_section (bfd *abfd, void **psinfo, asection *sec, void **psecinfo) return FALSE; } -/* Compare two sec_merge_hash_entry structures. This is called via qsort. */ - -static int -cmplengthentry (const void *a, const void *b) -{ - struct sec_merge_hash_entry * A = *(struct sec_merge_hash_entry **) a; - struct sec_merge_hash_entry * B = *(struct sec_merge_hash_entry **) b; - - if (A->len < B->len) - return 1; - else if (A->len > B->len) - return -1; - - return memcmp (A->root.string, B->root.string, A->len); -} - -static int -last4_eq (const void *a, const void *b) -{ - struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a; - struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b; - - if (memcmp (A->root.string + A->len - 5 * A->u.entsize, - B->root.string + B->len - 5 * A->u.entsize, - 4 * A->u.entsize) != 0) - /* This was a hashtable collision. */ - return 0; - - if (A->len <= B->len) - /* B cannot be a suffix of A unless A is equal to B, which is guaranteed - not to be equal by the hash table. */ - return 0; - - if (A->alignment < B->alignment - || ((A->len - B->len) & (B->alignment - 1))) - /* The suffix is not sufficiently aligned. */ - return 0; - - return memcmp (A->root.string + (A->len - B->len), - B->root.string, B->len - 5 * A->u.entsize) == 0; -} - -static int -last_eq (const void *a, const void *b) -{ - struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a; - struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b; - - if (B->len >= 5 * A->u.entsize) - /* Longer strings are just pushed into the hash table, - they'll be used when looking up for very short strings. */ - return 0; - - if (memcmp (A->root.string + A->len - 2 * A->u.entsize, - B->root.string + B->len - 2 * A->u.entsize, - A->u.entsize) != 0) - /* This was a hashtable collision. */ - return 0; - - if (A->len <= B->len) - /* B cannot be a suffix of A unless A is equal to B, which is guaranteed - not to be equal by the hash table. */ - return 0; - - if (A->alignment < B->alignment - || ((A->len - B->len) & (B->alignment - 1))) - /* The suffix is not sufficiently aligned. */ - return 0; - - return memcmp (A->root.string + (A->len - B->len), - B->root.string, B->len - 2 * A->u.entsize) == 0; -} - /* Record one section into the hash table. */ static bfd_boolean record_section (struct sec_merge_info *sinfo, @@ -534,7 +462,7 @@ record_section (struct sec_merge_info *sinfo, goto error_return; } p++; - } + } } else { @@ -576,18 +504,81 @@ error_return: return FALSE; } +static int +strrevcmp (const void *a, const void *b) +{ + struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a; + struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b; + unsigned int lenA = A->len; + unsigned int lenB = B->len; + const unsigned char *s = A->root.string + lenA - 1; + const unsigned char *t = B->root.string + lenB - 1; + int l = lenA < lenB ? lenA : lenB; + + while (l) + { + if (*s != *t) + return (int) *s - (int) *t; + s--; + t--; + l--; + } + return lenA - lenB; +} + +/* Like strrevcmp, but for the case where all strings have the same + alignment > entsize. */ + +static int +strrevcmp_align (const void *a, const void *b) +{ + struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a; + struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b; + unsigned int lenA = A->len; + unsigned int lenB = B->len; + const unsigned char *s = A->root.string + lenA - 1; + const unsigned char *t = B->root.string + lenB - 1; + int l = lenA < lenB ? lenA : lenB; + int tail_align = (lenA & (A->alignment - 1)) - (lenB & (A->alignment - 1)); + + if (tail_align != 0) + return tail_align; + + while (l) + { + if (*s != *t) + return (int) *s - (int) *t; + s--; + t--; + l--; + } + return lenA - lenB; +} + +static inline int +is_suffix (const struct sec_merge_hash_entry *A, + const struct sec_merge_hash_entry *B) +{ + if (A->len <= B->len) + /* B cannot be a suffix of A unless A is equal to B, which is guaranteed + not to be equal by the hash table. */ + return 0; + + return memcmp (A->root.string + (A->len - B->len), + B->root.string, B->len) == 0; +} + /* This is a helper function for _bfd_merge_sections. It attempts to merge strings matching suffixes of longer strings. */ static void merge_strings (struct sec_merge_info *sinfo) { - struct sec_merge_hash_entry **array, **a, **end, *e; + struct sec_merge_hash_entry **array, **a, *e; struct sec_merge_sec_info *secinfo; - htab_t lasttab = NULL, last4tab = NULL; bfd_size_type size, amt; + unsigned int alignment = 0; - /* Now sort the strings by length, longest first. */ - array = NULL; + /* Now sort the strings */ amt = sinfo->htab->size * sizeof (struct sec_merge_hash_entry *); array = (struct sec_merge_hash_entry **) bfd_malloc (amt); if (array == NULL) @@ -595,90 +586,50 @@ merge_strings (struct sec_merge_info *sinfo) for (e = sinfo->htab->first, a = array; e; e = e->next) if (e->alignment) - *a++ = e; + { + *a++ = e; + /* Adjust the length to not include the zero terminator. */ + e->len -= sinfo->htab->entsize; + if (alignment != e->alignment) + { + if (alignment == 0) + alignment = e->alignment; + else + alignment = (unsigned) -1; + } + } sinfo->htab->size = a - array; - - qsort (array, (size_t) sinfo->htab->size, - sizeof (struct sec_merge_hash_entry *), cmplengthentry); - - last4tab = htab_create_alloc ((size_t) sinfo->htab->size * 4, - NULL, last4_eq, NULL, calloc, free); - lasttab = htab_create_alloc ((size_t) sinfo->htab->size * 4, - NULL, last_eq, NULL, calloc, free); - if (lasttab == NULL || last4tab == NULL) - goto alloc_failure; - - /* Now insert the strings into hash tables (strings with last 4 characters - and strings with last character equal), look for longer strings which - we're suffix of. */ - for (a = array, end = array + sinfo->htab->size; a < end; a++) + if (sinfo->htab->size != 0) { - register hashval_t hash; - unsigned int c; - unsigned int i; - const unsigned char *s; - void **p; - - e = *a; - e->u.entsize = sinfo->htab->entsize; - if (e->len <= e->u.entsize) - break; - if (e->len > 4 * e->u.entsize) + qsort (array, (size_t) sinfo->htab->size, + sizeof (struct sec_merge_hash_entry *), + (alignment != (unsigned) -1 && alignment > sinfo->htab->entsize + ? strrevcmp_align : strrevcmp)); + + /* Loop over the sorted array and merge suffixes */ + e = *--a; + e->len += sinfo->htab->entsize; + while (--a >= array) { - s = (const unsigned char *) (e->root.string + e->len - e->u.entsize); - hash = 0; - for (i = 0; i < 4 * e->u.entsize; i++) - { - c = *--s; - hash += c + (c << 17); - hash ^= hash >> 2; - } - p = htab_find_slot_with_hash (last4tab, e, hash, INSERT); - if (p == NULL) - goto alloc_failure; - if (*p) - { - struct sec_merge_hash_entry *ent; + struct sec_merge_hash_entry *cmp = *a; - ent = (struct sec_merge_hash_entry *) *p; - e->u.suffix = ent; - e->alignment = 0; - continue; + cmp->len += sinfo->htab->entsize; + if (e->alignment >= cmp->alignment + && !((e->len - cmp->len) & (cmp->alignment - 1)) + && is_suffix (e, cmp)) + { + cmp->u.suffix = e; + cmp->alignment = 0; } else - *p = e; - } - s = (const unsigned char *) (e->root.string + e->len - e->u.entsize); - hash = 0; - for (i = 0; i < e->u.entsize; i++) - { - c = *--s; - hash += c + (c << 17); - hash ^= hash >> 2; + e = cmp; } - p = htab_find_slot_with_hash (lasttab, e, hash, INSERT); - if (p == NULL) - goto alloc_failure; - if (*p) - { - struct sec_merge_hash_entry *ent; - - ent = (struct sec_merge_hash_entry *) *p; - e->u.suffix = ent; - e->alignment = 0; - } - else - *p = e; } alloc_failure: if (array) free (array); - if (lasttab) - htab_delete (lasttab); - if (last4tab) - htab_delete (last4tab); /* Now assign positions to the strings we want to keep. */ size = 0; -- 2.7.4