/* String pool for GCC.
- Copyright (C) 2000 Free Software Foundation, Inc.
+ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+ Free Software Foundation, Inc.
-This file is part of GNU CC.
+This file is part of GCC.
-GNU CC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
-GNU CC is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to the Free
-Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA. */
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
-/* String pool allocator. All strings allocated by ggc_alloc_string are
- uniquified and stored in an obstack which is never shrunk. You can
- associate a tree with a string if you wish; this is used to implement
- get_identifier.
+/* String text, identifier text and identifier node allocator.
+ Identifiers are uniquely stored in a hash table.
- We have our own private hash table implementation which is similar
- to the one in cpphash.c (actually, it's a further refinement of
- that code). libiberty's hashtab.c is not used because it requires
- 100% average space overhead per string, which is unacceptable.
- Also, this algorithm is faster. */
+ We use cpplib's hash table implementation. libiberty's
+ hashtab.c is not used because it requires 100% average space
+ overhead per string, which is unacceptable. Also, this algorithm
+ is faster. */
#include "config.h"
#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
#include "ggc.h"
+#include "ggc-internal.h"
#include "tree.h"
-#include "obstack.h"
-#include "flags.h"
-#include "toplev.h"
+#include "symtab.h"
+#include "cpplib.h"
/* The "" allocated string. */
const char empty_string[] = "";
/* Character strings, each containing a single decimal digit.
Written this way to save space. */
-const char digit_vector[] = {
+static const char digit_vector[] = {
'0', 0, '1', 0, '2', 0, '3', 0, '4', 0,
'5', 0, '6', 0, '7', 0, '8', 0, '9', 0
};
-static struct obstack string_stack;
+#define digit_string(d) (digit_vector + ((d) * 2))
-/* This is the hash entry associated with each string. It lives in
- the hash table; only the string lives in the obstack. Note that
- the string is not necessarily NUL terminated. */
+struct ht *ident_hash;
-struct str_header
-{
- const char *ptr;
- tree data; /* for get_identifier */
- unsigned int len;
-};
+static hashnode alloc_node (hash_table *);
+static int mark_ident (struct cpp_reader *, hashnode, const void *);
-/* This is the hash table structure. There's only one. */
-struct str_hash
+static void *
+stringpool_ggc_alloc (size_t x)
{
- struct str_header *entries;
- size_t nslots; /* total slots in the entries array */
- size_t nelements; /* number of live elements */
-
- /* table usage statistics */
- unsigned int searches;
- unsigned int collisions;
-};
-#define INITIAL_HASHSIZE (16*1024)
-
-static struct str_hash string_hash = { 0, INITIAL_HASHSIZE, 0, 0, 0 };
-
-enum insert_option { INSERT, NO_INSERT };
-
-static struct str_header *alloc_string PARAMS ((const char *, size_t,
- enum insert_option));
-static inline unsigned int calc_hash PARAMS ((const unsigned char *, size_t));
-static void mark_string_hash PARAMS ((void *));
-static struct str_header *expand_string_table PARAMS ((struct str_header *));
-
-/* Convenience macro for iterating over the hash table. E is set to
- each live entry in turn. */
-#define FORALL_STRINGS(E) \
-for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
- if (E->ptr != NULL)
- /* block here */
-
-/* Likewise, but tests ->data instead of ->ptr (for cases where we only
- care about entries with ->data set) */
-#define FORALL_IDS(E) \
-for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
- if (E->data != NULL)
-
-/* 0 while creating built-in identifiers. */
-static int do_identifier_warnings;
+ return ggc_alloc_atomic (x);
+}
/* Initialize the string pool. */
void
-init_stringpool ()
+init_stringpool (void)
{
- gcc_obstack_init (&string_stack);
- ggc_add_root (&string_hash, 1, sizeof string_hash, mark_string_hash);
-
- /* Strings need no alignment. */
- obstack_alignment_mask (&string_stack) = 0;
-
- string_hash.entries = (struct str_header *)
- xcalloc (string_hash.nslots, sizeof (struct str_header));
+ /* Create with 16K (2^14) entries. */
+ ident_hash = ht_create (14);
+ ident_hash->alloc_node = alloc_node;
+ ident_hash->alloc_subobject = stringpool_ggc_alloc;
}
-/* Enable warnings on similar identifiers (if requested).
- Done after the built-in identifiers are created. */
-void
-start_identifier_warnings ()
+/* Allocate a hash node. */
+static hashnode
+alloc_node (hash_table *table ATTRIBUTE_UNUSED)
{
- do_identifier_warnings = 1;
+ return GCC_IDENT_TO_HT_IDENT (make_node (IDENTIFIER_NODE));
}
-/* Record the size of an identifier node for the language in use.
- SIZE is the total size in bytes.
- This is called by the language-specific files. This must be
- called before allocating any identifiers. */
-void
-set_identifier_size (size)
- int size;
-{
- tree_code_length[(int) IDENTIFIER_NODE]
- = (size - sizeof (struct tree_common)) / sizeof (tree);
-}
+/* Allocate and return a string constant of length LENGTH, containing
+ CONTENTS. If LENGTH is -1, CONTENTS is assumed to be a
+ nul-terminated string, and the length is calculated using strlen. */
-/* Calculate the hash of the string STR, which is of length LEN. */
-static inline unsigned int
-calc_hash (str, len)
- const unsigned char *str;
- size_t len;
+const char *
+ggc_alloc_string_stat (const char *contents, int length MEM_STAT_DECL)
{
- size_t n = len;
- unsigned int r = 0;
-#define HASHSTEP(r, c) ((r) * 67 + (c - 113));
+ char *result;
+
+ if (length == -1)
+ length = strlen (contents);
- while (n--)
- r = HASHSTEP (r, *str++);
+ if (length == 0)
+ return empty_string;
+ if (length == 1 && ISDIGIT (contents[0]))
+ return digit_string (contents[0] - '0');
- return r + len;
-#undef HASHSTEP
+ result = (char *) ggc_alloc_atomic_stat (length + 1 PASS_MEM_STAT);
+ memcpy (result, contents, length);
+ result[length] = '\0';
+ return (const char *) result;
}
-/* Internal primitive: returns the header structure for the string of
- length LENGTH, containing CONTENTS. If that string already exists
- in the table, returns the existing entry. If the string hasn't
- been seen before and the last argument is INSERT, inserts and returns
- a new entry. Otherwise returns NULL. */
-static struct str_header *
-alloc_string (contents, length, insert)
- const char *contents;
- size_t length;
- enum insert_option insert;
+/* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string).
+ If an identifier with that name has previously been referred to,
+ the same node is returned this time. */
+
+#undef get_identifier
+
+tree
+get_identifier (const char *text)
{
- unsigned int hash = calc_hash ((const unsigned char *)contents, length);
- unsigned int hash2;
- unsigned int index;
- size_t sizemask;
- struct str_header *entry;
- struct str_header *entries = string_hash.entries;
+ hashnode ht_node = ht_lookup (ident_hash,
+ (const unsigned char *) text,
+ strlen (text), HT_ALLOC);
- sizemask = string_hash.nslots - 1;
- index = hash & sizemask;
+ /* ht_node can't be NULL here. */
+ return HT_IDENT_TO_GCC_IDENT (ht_node);
+}
- /* hash2 must be odd, so we're guaranteed to visit every possible
- location in the table during rehashing. */
- hash2 = ((hash * 17) & sizemask) | 1;
- string_hash.searches++;
+/* Identical to get_identifier, except that the length is assumed
+ known. */
- for (;;)
- {
- entry = entries + index;
+tree
+get_identifier_with_length (const char *text, size_t length)
+{
+ hashnode ht_node = ht_lookup (ident_hash,
+ (const unsigned char *) text,
+ length, HT_ALLOC);
- if (entry->ptr == NULL)
- break;
+ /* ht_node can't be NULL here. */
+ return HT_IDENT_TO_GCC_IDENT (ht_node);
+}
- if (entry->len == length
- && !memcmp (entry->ptr, contents, length))
- return entry;
+/* If an identifier with the name TEXT (a null-terminated string) has
+ previously been referred to, return that node; otherwise return
+ NULL_TREE. */
- index = (index + hash2) & sizemask;
- string_hash.collisions++;
- }
+tree
+maybe_get_identifier (const char *text)
+{
+ hashnode ht_node;
- if (insert == NO_INSERT)
- return NULL;
+ ht_node = ht_lookup (ident_hash, (const unsigned char *) text,
+ strlen (text), HT_NO_INSERT);
+ if (ht_node)
+ return HT_IDENT_TO_GCC_IDENT (ht_node);
- obstack_grow0 (&string_stack, contents, length);
- entry->ptr = (const char *) obstack_finish (&string_stack);
- entry->len = length;
- entry->data = NULL;
+ return NULL_TREE;
+}
- if (++string_hash.nelements * 4 < string_hash.nslots * 3)
- return entry;
+/* Report some basic statistics about the string pool. */
- /* Must expand the string table. */
- return expand_string_table (entry);
+void
+stringpool_statistics (void)
+{
+ ht_dump_statistics (ident_hash);
}
+\f
+/* Mark an identifier for GC. */
-/* Subroutine of alloc_string which doubles the size of the hash table
- and rehashes all the strings into the new table. Returns the entry
- in the new table corresponding to ENTRY. */
-static struct str_header *
-expand_string_table (entry)
- struct str_header *entry;
+static int
+mark_ident (struct cpp_reader *pfile ATTRIBUTE_UNUSED, hashnode h,
+ const void *v ATTRIBUTE_UNUSED)
{
- struct str_header *nentries;
- struct str_header *e, *nentry = NULL;
- size_t size, sizemask;
-
- size = string_hash.nslots * 2;
- nentries = (struct str_header *) xcalloc (size, sizeof (struct str_header));
- sizemask = size - 1;
-
- FORALL_STRINGS (e)
- {
- unsigned int index, hash, hash2;
-
- hash = calc_hash ((const unsigned char *) e->ptr, e->len);
- hash2 = ((hash * 17) & sizemask) | 1;
- index = hash & sizemask;
-
- for (;;)
- {
- if (nentries[index].ptr == NULL)
- {
- nentries[index].ptr = e->ptr;
- nentries[index].len = e->len;
- nentries[index].data = e->data;
- if (e == entry)
- nentry = nentries + index;
- break;
- }
-
- index = (index + hash2) & sizemask;
- }
- }
-
- free (string_hash.entries);
- string_hash.entries = nentries;
- string_hash.nslots = size;
- return nentry;
+ gt_ggc_m_9tree_node (HT_IDENT_TO_GCC_IDENT (h));
+ return 1;
}
-/* Allocate and return a string constant of length LENGTH, containing
- CONTENTS. If LENGTH is -1, CONTENTS is assumed to be a
- nul-terminated string, and the length is calculated using strlen.
- If the same string constant has been allocated before, that copy is
- returned this time too. */
+/* Return true if an identifier should be removed from the table. */
-const char *
-ggc_alloc_string (contents, length)
- const char *contents;
- int length;
+static int
+maybe_delete_ident (struct cpp_reader *pfile ATTRIBUTE_UNUSED, hashnode h,
+ const void *v ATTRIBUTE_UNUSED)
{
- struct str_header *str;
+ return !ggc_marked_p (HT_IDENT_TO_GCC_IDENT (h));
+}
- if (length == -1)
- length = strlen (contents);
+/* Mark the trees hanging off the identifier node for GGC. These are
+ handled specially (not using gengtype) because identifiers are only
+ roots during one part of compilation. */
- if (length == 0)
- return empty_string;
- if (length == 1 && contents[0] >= '0' && contents[0] <= '9')
- return digit_string (contents[0] - '0');
-
- str = alloc_string (contents, length, INSERT);
- return str->ptr;
+void
+ggc_mark_stringpool (void)
+{
+ ht_forall (ident_hash, mark_ident, NULL);
}
-/* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string).
- If an identifier with that name has previously been referred to,
- the same node is returned this time. */
-tree
-get_identifier (text)
- const char *text;
+/* Purge the identifier hash of identifiers which are no longer
+ referenced. */
+
+void
+ggc_purge_stringpool (void)
{
- tree idp;
- struct str_header *str;
- size_t length = strlen (text);
-
- str = alloc_string (text, length, INSERT);
- idp = str->data;
- if (idp == NULL)
- {
- if (TREE_CODE_LENGTH (IDENTIFIER_NODE) < 0)
- abort (); /* set_identifier_size hasn't been called. */
-
- /* If this identifier is longer than the clash-warning length,
- do a brute force search of the entire table for clashes. */
- if (warn_id_clash && do_identifier_warnings && length >= (size_t) id_clash_len)
- {
- struct str_header *e;
- FORALL_IDS (e)
- {
- if (e->len >= (size_t)id_clash_len
- && !strncmp (e->ptr, text, id_clash_len))
- {
- warning ("\"%s\" and \"%s\" identical in first %d characters",
- text, e->ptr, id_clash_len);
- break;
- }
- }
- }
-
- idp = make_node (IDENTIFIER_NODE);
- IDENTIFIER_LENGTH (idp) = length;
- IDENTIFIER_POINTER (idp) = str->ptr;
-#ifdef GATHER_STATISTICS
- id_string_size += length;
-#endif
- str->data = idp;
- }
- return idp;
+ ht_purge (ident_hash, maybe_delete_ident, NULL);
}
-/* If an identifier with the name TEXT (a null-terminated string) has
- previously been referred to, return that node; otherwise return
- NULL_TREE. */
+/* Pointer-walking routine for strings (not very interesting, since
+ strings don't contain pointers). */
-tree
-maybe_get_identifier (text)
- const char *text;
+void
+gt_pch_p_S (void *obj ATTRIBUTE_UNUSED, void *x ATTRIBUTE_UNUSED,
+ gt_pointer_operator op ATTRIBUTE_UNUSED,
+ void *cookie ATTRIBUTE_UNUSED)
{
- struct str_header *str;
- size_t length = strlen (text);
-
- str = alloc_string (text, length, NO_INSERT);
- if (str)
- return str->data; /* N.B. str->data might be null here, if the
- string has been used but not as an identifier. */
- return NULL_TREE;
}
-/* Look up an identifier with the name TEXT, replace its identifier
- node with NODE, and return the old identifier node. This is used
- by languages which need to enable and disable keywords based on
- context; e.g. see remember_protocol_qualifiers in objc/objc-act.c. */
-tree
-set_identifier (text, node)
- const char *text;
- tree node;
+/* PCH pointer-walking routine for strings. */
+
+void
+gt_pch_n_S (const void *x)
{
- struct str_header *str;
- tree old;
- size_t length = strlen (text);
-
- str = alloc_string (text, length, INSERT);
- old = str->data; /* might be null */
- str->data = node;
- return old;
+ gt_pch_note_object (CONST_CAST (void *, x), CONST_CAST (void *, x),
+ >_pch_p_S, gt_types_enum_last);
}
+\f
+/* Handle saving and restoring the string pool for PCH. */
+
+/* SPD is saved in the PCH file and holds the information needed
+ to restore the string pool. */
+
+struct GTY(()) string_pool_data {
+ ht_identifier_ptr *
+ GTY((length ("%h.nslots"),
+ nested_ptr (union tree_node, "%h ? GCC_IDENT_TO_HT_IDENT (%h) : NULL",
+ "%h ? HT_IDENT_TO_GCC_IDENT (%h) : NULL")))
+ entries;
+ unsigned int nslots;
+ unsigned int nelements;
+};
-/* Report some basic statistics about the string pool. */
+static GTY(()) struct string_pool_data * spd;
+
+/* Save the stringpool data in SPD. */
void
-stringpool_statistics ()
+gt_pch_save_stringpool (void)
{
- size_t nelts, nids, overhead, headers;
- size_t total_bytes, longest, sum_of_squares;
- double exp_len, exp_len2, exp2_len;
- struct str_header *e;
-#define SCALE(x) ((unsigned long) ((x) < 1024*10 \
- ? (x) \
- : ((x) < 1024*1024*10 \
- ? (x) / 1024 \
- : (x) / (1024*1024))))
-#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
-
- total_bytes = longest = sum_of_squares = nids = 0;
- FORALL_STRINGS (e)
- {
- size_t n = e->len;
-
- total_bytes += n;
- sum_of_squares += n*n;
- if (n > longest)
- longest = n;
- if (e->data)
- nids++;
- }
-
- nelts = string_hash.nelements;
- overhead = obstack_memory_used (&string_stack) - total_bytes;
- headers = string_hash.nslots * sizeof (struct str_header);
-
- fprintf (stderr,
-"\nString pool\n\
-entries\t\t%lu\n\
-identifiers\t%lu (%.2f%%)\n\
-slots\t\t%lu\n\
-bytes\t\t%lu%c (%lu%c overhead)\n\
-table size\t%lu%c\n",
- (unsigned long) nelts,
- (unsigned long) nids, nids * 100.0 / nelts,
- (unsigned long) string_hash.nslots,
- SCALE (total_bytes), LABEL (total_bytes),
- SCALE (overhead), LABEL (overhead),
- SCALE (headers), LABEL (headers));
-
- exp_len = (double)total_bytes / (double)nelts;
- exp2_len = exp_len * exp_len;
- exp_len2 = (double)sum_of_squares / (double)nelts;
-
- fprintf (stderr,
-"coll/search\t%.4f\n\
-ins/search\t%.4f\n\
-avg. entry\t%.2f bytes (+/- %.2f)\n\
-longest entry\t%lu\n",
- (double) string_hash.collisions / (double) string_hash.searches,
- (double) nelts / (double) string_hash.searches,
- exp_len, approx_sqrt (exp_len2 - exp2_len),
- (unsigned long) longest);
-#undef SCALE
-#undef LABEL
+ spd = ggc_alloc_string_pool_data ();
+ spd->nslots = ident_hash->nslots;
+ spd->nelements = ident_hash->nelements;
+ spd->entries = ggc_alloc_vec_ht_identifier_ptr (spd->nslots);
+ memcpy (spd->entries, ident_hash->entries,
+ spd->nslots * sizeof (spd->entries[0]));
}
-/* Mark the string hash for GC. */
+/* Return the stringpool to its state before gt_pch_save_stringpool
+ was called. */
-static void
-mark_string_hash (arg)
- void *arg ATTRIBUTE_UNUSED;
+void
+gt_pch_fixup_stringpool (void)
{
- struct str_header *h;
+}
+
+/* A PCH file has been restored, which loaded SPD; fill the real hash table
+ from SPD. */
- FORALL_IDS (h)
- {
- ggc_mark_tree (h->data);
- }
+void
+gt_pch_restore_stringpool (void)
+{
+ ht_load (ident_hash, spd->entries, spd->nslots, spd->nelements, false);
+ spd = NULL;
}
+
+#include "gt-stringpool.h"