* merge.c (struct sec_merge_sec_info): Update comment.

author Alan Modra <amodra@gmail.com>

Wed, 29 Oct 2003 22:59:37 +0000 (22:59 +0000)

committer Alan Modra <amodra@gmail.com>

Wed, 29 Oct 2003 22:59:37 +0000 (22:59 +0000)
author Alan Modra <amodra@gmail.com>
Wed, 29 Oct 2003 22:59:37 +0000 (22:59 +0000)
committer Alan Modra <amodra@gmail.com>
Wed, 29 Oct 2003 22:59:37 +0000 (22:59 +0000)
diff --git a/bfd/ChangeLog b/bfd/ChangeLog

index 87e516ed6bd320cef0c053f75c84c0689751b9b0..4ebc88ec742a9fd6d9f73941bbfe6cb419b00bd2 100644 (file)
--- a/bfd/ChangeLog
+++ b/bfd/ChangeLog
@@ -1,3 +1,23 @@
+2003-10-30  Lars Knoll  <lars@trolltech.com>
+           Michael Matz  <matz@suse.de>
+           Jakub Jelinek  <jakub@redhat.com>
+           Alan Modra  <amodra@bigpond.net.au>
+
+       * merge.c (struct sec_merge_sec_info): Update comment.
+       (struct sec_merge_hash_entry): Remove entsize.
+       (sec_merge_hash_lookup): Only adjust alignment when creating.
+       (sec_merge_emit): Remove register keyword.
+       (cmplengthentry, last4_eq, last_eq): Delete.
+       (strrevcmp, strrevcmp_align, is_suffix): New.
+       (merge_strings): Use them to implement fast suffix merging.
+       * elf-strtab.c (struct elf_strtab_hash_entry): Update comments.
+       Make "len" signed.
+       (_bfd_elf_strtab_add): Lose on >2G strings.
+       (_bfd_elf_strtab_emit): Don't emit strings with len < 0.
+       (cmplengthentry, last4_eq): Delete.
+       (strrevcmp, is_suffix): New.
+       (_bfd_elf_strtab_finalize): Rework to implement fast suffix merging.
+
  2003-10-29  Daniel Jacobowitz  <drow@mvista.com>
  
          * elf32-arm.h (elf32_arm_final_link_relocate): Move check for
diff --git a/bfd/elf-strtab.c b/bfd/elf-strtab.c

index 764ab5442328eafb15ffc925647c91f7d5986514..673b9d77ac98d5e36b46d910ceb01ec0e3708883 100644 (file)
--- a/bfd/elf-strtab.c
+++ b/bfd/elf-strtab.c
@@ -1,5 +1,5 @@
  /* ELF strtab with GC and suffix merging support.
-   Copyright 2001, 2002 Free Software Foundation, Inc.
+   Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
     Written by Jakub Jelinek <jakub@redhat.com>.
  
     This file is part of BFD, the Binary File Descriptor library.
@@ -30,15 +30,14 @@
  struct elf_strtab_hash_entry
  {
    struct bfd_hash_entry root;
-  /* Length of this entry.  */
-  unsigned int len;
+  /* Length of this entry.  This includes the zero terminator.  */
+  int len;
    unsigned int refcount;
    union {
      /* Index within the merged section.  */
      bfd_size_type index;
-    /* Entry this is a suffix of (if len is 0).  */
+    /* Entry this is a suffix of (if len < 0).  */
      struct elf_strtab_hash_entry *suffix;
-    struct elf_strtab_hash_entry *next;
    } u;
  };
  
@@ -158,6 +157,8 @@ _bfd_elf_strtab_add (struct elf_strtab_hash *tab,
    if (entry->len == 0)
      {
        entry->len = strlen (str) + 1;
+      /* 2G strings lose.  */
+      BFD_ASSERT (entry->len > 0);
        if (tab->size == tab->alloced)
         {
           bfd_size_type amt = sizeof (struct elf_strtab_hash_entry *);
@@ -235,14 +236,14 @@ _bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab)
    for (i = 1; i < tab->size; ++i)
      {
        register const char *str;
-      register size_t len;
+      register unsigned int len;
  
-      str = tab->array[i]->root.string;
-      len = tab->array[i]->len;
        BFD_ASSERT (tab->array[i]->refcount == 0);
-      if (len == 0)
+      len = tab->array[i]->len;
+      if ((int) len < 0)
         continue;
  
+      str = tab->array[i]->root.string;
        if (bfd_bwrite (str, len, abfd) != len)
         return FALSE;
  
@@ -253,40 +254,41 @@ _bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab)
    return TRUE;
  }
  
-/* Compare two elf_strtab_hash_entry structures.  This is called via qsort.  */
+/* Compare two elf_strtab_hash_entry structures.  Called via qsort.  */
  
  static int
-cmplengthentry (const void *a, const void *b)
+strrevcmp (const void *a, const void *b)
  {
    struct elf_strtab_hash_entry *A = *(struct elf_strtab_hash_entry **) a;
    struct elf_strtab_hash_entry *B = *(struct elf_strtab_hash_entry **) b;
+  unsigned int lenA = A->len;
+  unsigned int lenB = B->len;
+  const unsigned char *s = A->root.string + lenA - 1;
+  const unsigned char *t = B->root.string + lenB - 1;
+  int l = lenA < lenB ? lenA : lenB;
  
-  if (A->len < B->len)
-    return 1;
-  else if (A->len > B->len)
-    return -1;
-
-  return memcmp (A->root.string, B->root.string, A->len);
+  while (l)
+    {
+      if (*s != *t)
+       return (int) *s - (int) *t;
+      s--;
+      t--;
+      l--;
+    }
+  return lenA - lenB;
  }
  
-static int
-last4_eq (const void *a, const void *b)
+static inline int
+is_suffix (const struct elf_strtab_hash_entry *A,
+          const struct elf_strtab_hash_entry *B)
  {
-  const struct elf_strtab_hash_entry *A = a;
-  const struct elf_strtab_hash_entry *B = b;
-
-  if (memcmp (A->root.string + A->len - 5, B->root.string + B->len - 5, 4)
-      != 0)
-    /* This was a hashtable collision.  */
-    return 0;
-
    if (A->len <= B->len)
      /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
         not to be equal by the hash table.  */
      return 0;
  
    return memcmp (A->root.string + (A->len - B->len),
-                B->root.string, B->len - 5) == 0;
+                B->root.string, B->len - 1) == 0;
  }
  
  /* This function assigns final string table offsets for used strings,
@@ -295,10 +297,8 @@ last4_eq (const void *a, const void *b)
  void
  _bfd_elf_strtab_finalize (struct elf_strtab_hash *tab)
  {
-  struct elf_strtab_hash_entry **array, **a, **end, *e;
-  htab_t last4tab = NULL;
+  struct elf_strtab_hash_entry **array, **a, *e;
    bfd_size_type size, amt;
-  struct elf_strtab_hash_entry *last[256], **last_ptr[256];
  
    /* GCC 2.91.66 (egcs-1.1.2) on i386 miscompiles this function when i is
       a 64-bit bfd_size_type: a 64-bit target or --enable-64-bit-bfd.
@@ -306,105 +306,71 @@ _bfd_elf_strtab_finalize (struct elf_strtab_hash *tab)
       cycles.  */
    size_t i;
  
-  /* Now sort the strings by length, longest first.  */
-  array = NULL;
+  /* Sort the strings by suffix and length.  */
    amt = tab->size * sizeof (struct elf_strtab_hash_entry *);
    array = bfd_malloc (amt);
    if (array == NULL)
      goto alloc_failure;
  
-  memset (last, 0, sizeof (last));
-  for (i = 0; i < 256; ++i)
-    last_ptr[i] = &last[i];
    for (i = 1, a = array; i < tab->size; ++i)
-    if (tab->array[i]->refcount)
-      *a++ = tab->array[i];
-    else
-      tab->array[i]->len = 0;
+    {
+      e = tab->array[i];
+      if (e->refcount)
+       {
+         *a++ = e;
+         /* Adjust the length to not include the zero terminator.  */
+         e->len -= 1;
+       }
+      else
+       e->len = 0;
+    }
  
    size = a - array;
+  if (size != 0)
+    {
+      qsort (array, size, sizeof (struct elf_strtab_hash_entry *), strrevcmp);
  
-  qsort (array, size, sizeof (struct elf_strtab_hash_entry *), cmplengthentry);
+      /* Loop over the sorted array and merge suffixes.  Start from the
+        end because we want eg.
  
-  last4tab = htab_create_alloc (size * 4, NULL, last4_eq, NULL, calloc, free);
-  if (last4tab == NULL)
-    goto alloc_failure;
+        s1 -> "d"
+        s2 -> "bcd"
+        s3 -> "abcd"
  
-  /* Now insert the strings into hash tables (strings with last 4 characters
-     and strings with last character equal), look for longer strings which
-     we're suffix of.  */
-  for (a = array, end = array + size; a < end; a++)
-    {
-      register hashval_t hash;
-      unsigned int c;
-      unsigned int j;
-      const unsigned char *s;
-      void **p;
-
-      e = *a;
-      if (e->len > 4)
-       {
-         s = e->root.string + e->len - 1;
-         hash = 0;
-         for (j = 0; j < 4; j++)
-           {
-             c = *--s;
-             hash += c + (c << 17);
-             hash ^= hash >> 2;
-           }
-         p = htab_find_slot_with_hash (last4tab, e, hash, INSERT);
-         if (p == NULL)
-           goto alloc_failure;
-         if (*p)
-           {
-             struct elf_strtab_hash_entry *ent;
+        to end up as
  
-             ent = *p;
-             e->u.suffix = ent;
-             e->len = 0;
-             continue;
-           }
-         else
-           *p = e;
-       }
-      else
-       {
-         struct elf_strtab_hash_entry *tem;
+        s3 -> "abcd"
+        s2 _____^
+        s1 _______^
  
-         c = e->root.string[e->len - 2] & 0xff;
+        ie. we don't want s1 pointing into the old s2.  */
+      e = *--a;
+      e->len += 1;
+      while (--a >= array)
+       {
+         struct elf_strtab_hash_entry *cmp = *a;
  
-         for (tem = last[c]; tem; tem = tem->u.next)
-           if (tem->len > e->len
-               && memcmp (tem->root.string + (tem->len - e->len),
-                          e->root.string, e->len - 1) == 0)
-             break;
-         if (tem)
+         cmp->len += 1;
+         if (is_suffix (e, cmp))
             {
-             e->u.suffix = tem;
-             e->len = 0;
-             continue;
+             cmp->u.suffix = e;
+             cmp->len = -cmp->len;
             }
+         else
+           e = cmp;
         }
-
-      c = e->root.string[e->len - 2] & 0xff;
-      /* Put longest strings first.  */
-      *last_ptr[c] = e;
-      last_ptr[c] = &e->u.next;
-      e->u.next = NULL;
      }
  
  alloc_failure:
    if (array)
      free (array);
-  if (last4tab)
-    htab_delete (last4tab);
  
-  /* Now assign positions to the strings we want to keep.  */
+  /* Assign positions to the strings we want to keep.  */
    size = 1;
    for (i = 1; i < tab->size; ++i)
      {
        e = tab->array[i];
-      if (e->refcount && e->len)
+      if (e->refcount && e->len > 0)
         {
           e->u.index = size;
           size += e->len;
@@ -413,12 +379,11 @@ alloc_failure:
  
    tab->sec_size = size;
  
-  /* And now adjust the rest.  */
+  /* Adjust the rest.  */
    for (i = 1; i < tab->size; ++i)
      {
        e = tab->array[i];
-      if (e->refcount && ! e->len)
-       e->u.index = e->u.suffix->u.index
-                    + (e->u.suffix->len - strlen (e->root.string) - 1);
+      if (e->refcount && e->len < 0)
+       e->u.index = e->u.suffix->u.index + (e->u.suffix->len + e->len);
      }
  }
diff --git a/bfd/merge.c b/bfd/merge.c

index 0371bd0f4a0014030c4e0038d542b8a4c9589736..89f45cd521a2f64fa2bbf314bc324a1b0572b811 100644 (file)
--- a/bfd/merge.c
+++ b/bfd/merge.c
@@ -34,7 +34,7 @@ struct sec_merge_sec_info;
  struct sec_merge_hash_entry
  {
    struct bfd_hash_entry root;
-  /* Length of this entry.  */
+  /* Length of this entry.  This includes the zero terminator.  */
    unsigned int len;
    /* Start of this string needs to be aligned to
       alignment octets (not 1 << align).  */
@@ -43,8 +43,6 @@ struct sec_merge_hash_entry
    {
      /* Index within the merged section.  */
      bfd_size_type index;
-    /* Entity size (if present in suffix hash tables).  */
-    unsigned int entsize;
      /* Entry this is a suffix of (if alignment is 0).  */
      struct sec_merge_hash_entry *suffix;
    } u;
@@ -205,9 +203,12 @@ sec_merge_hash_lookup (struct sec_merge_hash *table, const char *string,
              alignment, we need to insert another copy.  */
           if (hashp->alignment < alignment)
             {
-             /*  Mark the less aligned copy as deleted.  */
-             hashp->len = 0;
-             hashp->alignment = 0;
+             if (create)
+               {
+                 /*  Mark the less aligned copy as deleted.  */
+                 hashp->len = 0;
+                 hashp->alignment = 0;
+               }
               break;
             }
           return hashp;
@@ -287,7 +288,7 @@ sec_merge_add (struct sec_merge_hash *tab, const char *str,
  }
  
  static bfd_boolean
-sec_merge_emit (register bfd *abfd, struct sec_merge_hash_entry *entry)
+sec_merge_emit (bfd *abfd, struct sec_merge_hash_entry *entry)
  {
    struct sec_merge_sec_info *secinfo = entry->secinfo;
    asection *sec = secinfo->sec;
@@ -420,79 +421,6 @@ _bfd_merge_section (bfd *abfd, void **psinfo, asection *sec, void **psecinfo)
    return FALSE;
  }
  
-/* Compare two sec_merge_hash_entry structures.  This is called via qsort.  */
-
-static int
-cmplengthentry (const void *a, const void *b)
-{
-  struct sec_merge_hash_entry * A = *(struct sec_merge_hash_entry **) a;
-  struct sec_merge_hash_entry * B = *(struct sec_merge_hash_entry **) b;
-
-  if (A->len < B->len)
-    return 1;
-  else if (A->len > B->len)
-    return -1;
-
-  return memcmp (A->root.string, B->root.string, A->len);
-}
-
-static int
-last4_eq (const void *a, const void *b)
-{
-  struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a;
-  struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b;
-
-  if (memcmp (A->root.string + A->len - 5 * A->u.entsize,
-             B->root.string + B->len - 5 * A->u.entsize,
-             4 * A->u.entsize) != 0)
-    /* This was a hashtable collision.  */
-    return 0;
-
-  if (A->len <= B->len)
-    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
-       not to be equal by the hash table.  */
-    return 0;
-
-  if (A->alignment < B->alignment
-      || ((A->len - B->len) & (B->alignment - 1)))
-    /* The suffix is not sufficiently aligned.  */
-    return 0;
-
-  return memcmp (A->root.string + (A->len - B->len),
-                B->root.string, B->len - 5 * A->u.entsize) == 0;
-}
-
-static int
-last_eq (const void *a, const void *b)
-{
-  struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a;
-  struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b;
-
-  if (B->len >= 5 * A->u.entsize)
-    /* Longer strings are just pushed into the hash table,
-       they'll be used when looking up for very short strings.  */
-    return 0;
-
-  if (memcmp (A->root.string + A->len - 2 * A->u.entsize,
-             B->root.string + B->len - 2 * A->u.entsize,
-             A->u.entsize) != 0)
-    /* This was a hashtable collision.  */
-    return 0;
-
-  if (A->len <= B->len)
-    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
-       not to be equal by the hash table.  */
-    return 0;
-
-  if (A->alignment < B->alignment
-      || ((A->len - B->len) & (B->alignment - 1)))
-    /* The suffix is not sufficiently aligned.  */
-    return 0;
-
-  return memcmp (A->root.string + (A->len - B->len),
-                B->root.string, B->len - 2 * A->u.entsize) == 0;
-}
-
  /* Record one section into the hash table.  */
  static bfd_boolean
  record_section (struct sec_merge_info *sinfo,
@@ -534,7 +462,7 @@ record_section (struct sec_merge_info *sinfo,
                         goto error_return;
                     }
                   p++;
-               }
+               }
             }
           else
             {
@@ -576,18 +504,81 @@ error_return:
    return FALSE;
  }
  
+static int
+strrevcmp (const void *a, const void *b)
+{
+  struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
+  struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
+  unsigned int lenA = A->len;
+  unsigned int lenB = B->len;
+  const unsigned char *s = A->root.string + lenA - 1;
+  const unsigned char *t = B->root.string + lenB - 1;
+  int l = lenA < lenB ? lenA : lenB;
+
+  while (l)
+    {
+      if (*s != *t)
+       return (int) *s - (int) *t;
+      s--;
+      t--;
+      l--;
+    }
+  return lenA - lenB;
+}
+
+/* Like strrevcmp, but for the case where all strings have the same
+   alignment > entsize.  */
+
+static int
+strrevcmp_align (const void *a, const void *b)
+{
+  struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
+  struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
+  unsigned int lenA = A->len;
+  unsigned int lenB = B->len;
+  const unsigned char *s = A->root.string + lenA - 1;
+  const unsigned char *t = B->root.string + lenB - 1;
+  int l = lenA < lenB ? lenA : lenB;
+  int tail_align = (lenA & (A->alignment - 1)) - (lenB & (A->alignment - 1));
+
+  if (tail_align != 0)
+    return tail_align;
+
+  while (l)
+    {
+      if (*s != *t)
+       return (int) *s - (int) *t;
+      s--;
+      t--;
+      l--;
+    }
+  return lenA - lenB;
+}
+
+static inline int
+is_suffix (const struct sec_merge_hash_entry *A,
+          const struct sec_merge_hash_entry *B)
+{
+  if (A->len <= B->len)
+    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
+       not to be equal by the hash table.  */
+    return 0;
+
+  return memcmp (A->root.string + (A->len - B->len),
+                B->root.string, B->len) == 0;
+}
+
  /* This is a helper function for _bfd_merge_sections.  It attempts to
     merge strings matching suffixes of longer strings.  */
  static void
  merge_strings (struct sec_merge_info *sinfo)
  {
-  struct sec_merge_hash_entry **array, **a, **end, *e;
+  struct sec_merge_hash_entry **array, **a, *e;
    struct sec_merge_sec_info *secinfo;
-  htab_t lasttab = NULL, last4tab = NULL;
    bfd_size_type size, amt;
+  unsigned int alignment = 0;
  
-  /* Now sort the strings by length, longest first.  */
-  array = NULL;
+  /* Now sort the strings */
    amt = sinfo->htab->size * sizeof (struct sec_merge_hash_entry *);
    array = (struct sec_merge_hash_entry **) bfd_malloc (amt);
    if (array == NULL)
@@ -595,90 +586,50 @@ merge_strings (struct sec_merge_info *sinfo)
  
    for (e = sinfo->htab->first, a = array; e; e = e->next)
      if (e->alignment)
-      *a++ = e;
+      {
+       *a++ = e;
+       /* Adjust the length to not include the zero terminator.  */
+       e->len -= sinfo->htab->entsize;
+       if (alignment != e->alignment)
+         {
+           if (alignment == 0)
+             alignment = e->alignment;
+           else
+             alignment = (unsigned) -1;
+         }
+      }
  
    sinfo->htab->size = a - array;
-
-  qsort (array, (size_t) sinfo->htab->size,
-        sizeof (struct sec_merge_hash_entry *), cmplengthentry);
-
-  last4tab = htab_create_alloc ((size_t) sinfo->htab->size * 4,
-                               NULL, last4_eq, NULL, calloc, free);
-  lasttab = htab_create_alloc ((size_t) sinfo->htab->size * 4,
-                              NULL, last_eq, NULL, calloc, free);
-  if (lasttab == NULL || last4tab == NULL)
-    goto alloc_failure;
-
-  /* Now insert the strings into hash tables (strings with last 4 characters
-     and strings with last character equal), look for longer strings which
-     we're suffix of.  */
-  for (a = array, end = array + sinfo->htab->size; a < end; a++)
+  if (sinfo->htab->size != 0)
      {
-      register hashval_t hash;
-      unsigned int c;
-      unsigned int i;
-      const unsigned char *s;
-      void **p;
-
-      e = *a;
-      e->u.entsize = sinfo->htab->entsize;
-      if (e->len <= e->u.entsize)
-       break;
-      if (e->len > 4 * e->u.entsize)
+      qsort (array, (size_t) sinfo->htab->size,
+            sizeof (struct sec_merge_hash_entry *),
+            (alignment != (unsigned) -1 && alignment > sinfo->htab->entsize
+             ? strrevcmp_align : strrevcmp));
+
+      /* Loop over the sorted array and merge suffixes */
+      e = *--a;
+      e->len += sinfo->htab->entsize;
+      while (--a >= array)
         {
-         s = (const unsigned char *) (e->root.string + e->len - e->u.entsize);
-         hash = 0;
-         for (i = 0; i < 4 * e->u.entsize; i++)
-           {
-             c = *--s;
-             hash += c + (c << 17);
-             hash ^= hash >> 2;
-           }
-         p = htab_find_slot_with_hash (last4tab, e, hash, INSERT);
-         if (p == NULL)
-           goto alloc_failure;
-         if (*p)
-           {
-             struct sec_merge_hash_entry *ent;
+         struct sec_merge_hash_entry *cmp = *a;
  
-             ent = (struct sec_merge_hash_entry *) *p;
-             e->u.suffix = ent;
-             e->alignment = 0;
-             continue;
+         cmp->len += sinfo->htab->entsize;
+         if (e->alignment >= cmp->alignment
+             && !((e->len - cmp->len) & (cmp->alignment - 1))
+             && is_suffix (e, cmp))
+           {
+             cmp->u.suffix = e;
+             cmp->alignment = 0;
             }
           else
-           *p = e;
-       }
-      s = (const unsigned char *) (e->root.string + e->len - e->u.entsize);
-      hash = 0;
-      for (i = 0; i < e->u.entsize; i++)
-       {
-         c = *--s;
-         hash += c + (c << 17);
-         hash ^= hash >> 2;
+           e = cmp;
         }
-      p = htab_find_slot_with_hash (lasttab, e, hash, INSERT);
-      if (p == NULL)
-       goto alloc_failure;
-      if (*p)
-       {
-         struct sec_merge_hash_entry *ent;
-
-         ent = (struct sec_merge_hash_entry *) *p;
-         e->u.suffix = ent;
-         e->alignment = 0;
-       }
-      else
-       *p = e;
      }
  
  alloc_failure:
    if (array)
      free (array);
-  if (lasttab)
-    htab_delete (lasttab);
-  if (last4tab)
-    htab_delete (last4tab);
  
    /* Now assign positions to the strings we want to keep.  */
    size = 0;
author	Alan Modra <amodra@gmail.com>
	Wed, 29 Oct 2003 22:59:37 +0000 (22:59 +0000)
committer	Alan Modra <amodra@gmail.com>
	Wed, 29 Oct 2003 22:59:37 +0000 (22:59 +0000)
bfd/ChangeLog		patch \| blob \| history
bfd/elf-strtab.c		patch \| blob \| history
bfd/merge.c		patch \| blob \| history