2007-08-03 Hans Boehm <Hans.Boehm@hp.com>
+
+ * alloc.c, backgraph.c, headers.c, include/private/gc_priv.h:
+ Maintain GC_our_memory and GC_n_memory.
+ * dbg_mlc.c (GC_print_smashed_obj): Improve message.
+ (GC_print_all_smashed_proc): Pass client object address instead of
+ base.
+ * dyn_load.c (sort_heap_sects): New. (GC_register_map_entries):
+ Register sections that are contiguous and merged with our heap.
+ * malloc.c, os_dep.c (GC_text_mapping): Check for just base name
+ of libraries.
+ * malloc.c (calloc): Check for special callers even with
+ USE_PROC_FOR_LIBRARIES. Move assertion. Add rudimentary
+ malloc/free tracing.
+ * misc.c: No longer call GC_init_lib_bounds explicitly.
+ * thread_local_alloc.c (GC_malloc, GC_malloc_atomic): Always
+ initialize on demand.
+ * tests/test.c: Call GC_INIT only when required.
+
+2007-08-03 Hans Boehm <Hans.Boehm@hp.com>
* Makefile.direct: Remove comment fragment.
* tests/tests.am: Add smashtest.
word GC_n_heap_sects = 0; /* Number of sections currently in heap. */
+#ifdef USE_PROC_FOR_LIBRARIES
+ word GC_n_memory = 0; /* Number of GET_MEM allocated memory */
+ /* sections. */
+#endif
+
+#ifdef USE_PROC_FOR_LIBRARIES
+ /* Add HBLKSIZE aligned, GET_MEM-generated block to GC_our_memory. */
+ /* Defined to do nothing if USE_PROC_FOR_LIBRARIES not set. */
+ void GC_add_to_our_memory(ptr_t p, size_t bytes)
+ {
+ if (0 == p) return;
+ GC_our_memory[GC_n_memory].hs_start = p;
+ GC_our_memory[GC_n_memory].hs_bytes = bytes;
+ GC_n_memory++;
+ }
+#endif
/*
* Use the chunk of memory starting at p of size bytes as part of the heap.
* Assumes p is HBLKSIZE aligned, and bytes is a multiple of HBLKSIZE.
return(FALSE);
}
space = GET_MEM(bytes);
+ GC_add_to_our_memory((ptr_t)space, bytes);
if( space == 0 ) {
if (GC_print_stats) {
GC_log_printf("Failed to expand heap by %ld bytes\n",
if (0 == back_edge_space) {
back_edge_space = (back_edges *)
GET_MEM(MAX_BACK_EDGE_STRUCTS*sizeof(back_edges));
+ GC_add_to_our_memory((ptr_t)back_edge_space,
+ MAX_BACK_EDGE_STRUCTS*sizeof(back_edges));
}
if (0 != avail_back_edges) {
back_edges * result = avail_back_edges;
if (in_progress_size == 0) {
in_progress_size = INITIAL_IN_PROGRESS;
in_progress_space = (ptr_t *)GET_MEM(in_progress_size * sizeof(ptr_t));
+ GC_add_to_our_memory((ptr_t)in_progress_space,
+ in_progress_size * sizeof(ptr_t));
} else {
ptr_t * new_in_progress_space;
in_progress_size *= 2;
new_in_progress_space = (ptr_t *)
GET_MEM(in_progress_size * sizeof(ptr_t));
+ GC_add_to_our_memory((ptr_t)new_in_progress_space,
+ in_progress_size * sizeof(ptr_t));
BCOPY(in_progress_space, new_in_progress_space,
n_in_progress * sizeof(ptr_t));
in_progress_space = new_in_progress_space;
}
#ifndef SHORT_DBG_HDRS
+/* Use GC_err_printf and friends to print a description of the object */
+/* whose client-visible address is p, and which was smashed at */
+/* clobbered_addr. */
void GC_print_smashed_obj(ptr_t p, ptr_t clobbered_addr)
{
register oh * ohdr = (oh *)GC_base(p);
GC_ASSERT(I_DONT_HOLD_LOCK());
- GC_err_printf("%p in object at %p(", clobbered_addr, p);
+ GC_err_printf("%p in or near object at %p(", clobbered_addr, p);
if (clobbered_addr <= (ptr_t)(&(ohdr -> oh_sz))
|| ohdr -> oh_string == 0) {
GC_err_printf("<smashed>, appr. sz = %ld)\n",
if (GC_n_smashed == 0) return;
GC_err_printf("GC_check_heap_block: found smashed heap objects:\n");
for (i = 0; i < GC_n_smashed; ++i) {
- GC_print_smashed_obj(GC_base(GC_smashed[i]), GC_smashed[i]);
+ GC_print_smashed_obj((ptr_t)GC_base(GC_smashed[i]) + sizeof(oh),
+ GC_smashed[i]);
GC_smashed[i] = 0;
}
GC_n_smashed = 0;
char *GC_get_maps(void);
/* From os_dep.c */
+/* Sort an array of HeapSects by start address. */
+/* Unfortunately at least some versions of */
+/* Linux qsort end up calling malloc by way of sysconf, and hence can't */
+/* be used in the colector. Hence we roll our own. Should be */
+/* reasonably fast if the array is already mostly sorted, as we expect */
+/* it to be. */
+void sort_heap_sects(struct HeapSect *base, size_t number_of_elements)
+{
+ signed_word n = (signed_word)number_of_elements;
+ signed_word nsorted = 1;
+ signed_word i;
+
+ while (nsorted < n) {
+ while (nsorted < n &&
+ base[nsorted-1].hs_start < base[nsorted].hs_start)
+ ++nsorted;
+ if (nsorted == n) break;
+ GC_ASSERT(base[nsorted-1].hs_start > base[nsorted].hs_start);
+ i = nsorted - 1;
+ while (i >= 0 && base[i].hs_start > base[i+1].hs_start) {
+ struct HeapSect tmp = base[i];
+ base[i] = base[i+1];
+ base[i+1] = tmp;
+ --i;
+ }
+ GC_ASSERT(base[nsorted-1].hs_start < base[nsorted].hs_start);
+ ++nsorted;
+ }
+}
+
word GC_register_map_entries(char *maps)
{
char *prot;
unsigned i;
ptr_t datastart = (ptr_t)(DATASTART);
- /* Compute heap bounds. FIXME: Should work if heap and roots are */
- /* interleaved? */
- least_ha = (ptr_t)(word)(-1);
- greatest_ha = 0;
- for (i = 0; i < GC_n_heap_sects; ++i) {
- ptr_t sect_start = GC_heap_sects[i].hs_start;
- ptr_t sect_end = sect_start + GC_heap_sects[i].hs_bytes;
- if (sect_start < least_ha) least_ha = sect_start;
- if (sect_end > greatest_ha) greatest_ha = sect_end;
- }
- if (greatest_ha < (ptr_t)GC_scratch_last_end_ptr)
- greatest_ha = (ptr_t)GC_scratch_last_end_ptr;
+ GC_ASSERT(I_HOLD_LOCK());
+ sort_heap_sects(GC_our_memory, GC_n_memory);
+ least_ha = GC_our_memory[0].hs_start;
+ greatest_ha = GC_our_memory[GC_n_memory-1].hs_start
+ + GC_our_memory[GC_n_memory-1].hs_bytes;
for (;;) {
buf_ptr = GC_parse_map_entry(buf_ptr, &start, &end, &prot, &maj_dev, 0);
/* That can fail because the stack may disappear while */
/* we're marking. Thus the marker is, and has to be */
/* prepared to recover from segmentation faults. */
+
if (GC_segment_is_thread_stack(start, end)) continue;
- /* FIXME: REDIRECT_MALLOC actually works with threads on */
- /* LINUX/IA64 if we omit this check. The problem is that */
+
+ /* FIXME: NPTL squirrels */
+ /* away pointers in pieces of the stack segment that we */
+ /* don't scan. We work around this */
+ /* by treating anything allocated by libpthread as */
+ /* uncollectable, as we do in some other cases. */
+ /* A specifically identified problem is that */
/* thread stacks contain pointers to dynamic thread */
/* vectors, which may be reused due to thread caching. */
- /* Currently they may not be marked if the thread is */
- /* still live. */
- /* For dead threads, we trace the whole stack, which is */
+ /* They may not be marked if the thread is still live. */
+ /* This specific instance should be addressed by */
+ /* INCLUDE_LINUX_THREAD_DESCR, but that doesn't quite */
+ /* seem to suffice. */
+ /* We currently trace entire thread stacks, if they are */
+ /* are currently cached but unused. This is */
/* very suboptimal for performance reasons. */
# endif
/* We no longer exclude the main data segment. */
- if (start < least_ha && end > least_ha) {
- end = least_ha;
- }
- if (start < greatest_ha && end > greatest_ha) {
- start = greatest_ha;
+ if (end <= least_ha || start >= greatest_ha) {
+ /* The easy case; just trace entire segment */
+ GC_add_roots_inner((char *)start, (char *)end, TRUE);
+ continue;
}
- if (start >= least_ha && end <= greatest_ha) continue;
- GC_add_roots_inner((char *)start, (char *)end, TRUE);
+ /* Add sections that dont belong to us. */
+ i = 0;
+ while (GC_our_memory[i].hs_start + GC_our_memory[i].hs_bytes
+ < start)
+ ++i;
+ GC_ASSERT(i < GC_n_memory);
+ if (GC_our_memory[i].hs_start <= start) {
+ start = GC_our_memory[i].hs_start
+ + GC_our_memory[i].hs_bytes;
+ ++i;
+ }
+ while (i < GC_n_memory && GC_our_memory[i].hs_start < end
+ && start < end) {
+ if ((char *)start < GC_our_memory[i].hs_start)
+ GC_add_roots_inner((char *)start,
+ GC_our_memory[i].hs_start, TRUE);
+ start = GC_our_memory[i].hs_start
+ + GC_our_memory[i].hs_bytes;
+ ++i;
+ }
+ if (start < end)
+ GC_add_roots_inner((char *)start, (char *)end, TRUE);
}
}
return 1;
bytes_to_get &= ~(GC_page_size - 1);
# endif
result = (ptr_t)GET_MEM(bytes_to_get);
+ GC_add_to_our_memory(result, bytes_to_get);
scratch_free_ptr -= bytes;
GC_scratch_last_end_ptr = result + bytes;
return(result);
}
result = (ptr_t)GET_MEM(bytes_to_get);
+ GC_add_to_our_memory(result, bytes_to_get);
if (result == 0) {
if (GC_print_stats)
GC_printf("Out of memory - trying to allocate less\n");
bytes_to_get += GC_page_size - 1;
bytes_to_get &= ~(GC_page_size - 1);
# endif
- return((ptr_t)GET_MEM(bytes_to_get));
+ result = (ptr_t)GET_MEM(bytes_to_get);
+ GC_add_to_our_memory(result, bytes_to_get);
+ return result;
}
scratch_free_ptr = result;
GC_scratch_end_ptr = scratch_free_ptr + bytes_to_get;
# endif
struct HeapSect {
ptr_t hs_start; size_t hs_bytes;
- } _heap_sects[MAX_HEAP_SECTS];
+ } _heap_sects[MAX_HEAP_SECTS]; /* Heap segments potentially */
+ /* client objects. */
+# if defined(USE_PROC_FOR_LIBRARIES)
+ struct HeapSect _our_memory[MAX_HEAP_SECTS];
+ /* All GET_MEM allocated */
+ /* memory. Includes block */
+ /* headers and the like. */
+# endif
# if defined(MSWIN32) || defined(MSWINCE)
ptr_t _heap_bases[MAX_HEAP_SECTS];
/* Start address of memory regions obtained from kernel. */
# define GC_requested_heapsize GC_arrays._requested_heapsize
# define GC_bytes_allocd_before_gc GC_arrays._bytes_allocd_before_gc
# define GC_heap_sects GC_arrays._heap_sects
+# ifdef USE_PROC_FOR_LIBRARIES
+# define GC_our_memory GC_arrays._our_memory
+# endif
# define GC_last_stack GC_arrays._last_stack
#ifdef ENABLE_TRACE
#define GC_trace_addr GC_arrays._trace_addr
extern word GC_n_heap_sects; /* Number of separately added heap */
/* sections. */
+#ifdef USE_PROC_FOR_LIBRARIES
+ extern word GC_n_memory; /* Number of GET_MEM allocated memory */
+ /* sections. */
+#endif
+
extern word GC_page_size;
# if defined(MSWIN32) || defined(MSWINCE)
void GC_add_to_heap(struct hblk *p, size_t bytes);
/* Add a HBLKSIZE aligned chunk to the heap. */
+
+#ifdef USE_PROC_FOR_LIBRARIES
+ void GC_add_to_our_memory(ptr_t p, size_t bytes);
+ /* Add a chunk to GC_our_memory. */
+ /* If p == 0, do nothing. */
+#else
+# define GC_add_to_our_memory(p, bytes)
+#endif
void GC_print_obj(ptr_t p);
/* P points to somewhere inside an object with */
return((void *)REDIRECT_MALLOC(lb));
}
-#ifdef GC_LINUX_THREADS
+#if defined(GC_LINUX_THREADS) /* && !defined(USE_PROC_FOR_LIBRARIES) */
static ptr_t GC_libpthread_start = 0;
static ptr_t GC_libpthread_end = 0;
static ptr_t GC_libld_start = 0;
void GC_init_lib_bounds(void)
{
if (GC_libpthread_start != 0) return;
- if (!GC_text_mapping("/lib/tls/libpthread-",
- &GC_libpthread_start, &GC_libpthread_end)
- && !GC_text_mapping("/lib/libpthread-",
- &GC_libpthread_start, &GC_libpthread_end)) {
+ if (!GC_text_mapping("libpthread-",
+ &GC_libpthread_start, &GC_libpthread_end)) {
WARN("Failed to find libpthread.so text mapping: Expect crash\n", 0);
/* This might still work with some versions of libpthread, */
/* so we don't abort. Perhaps we should. */
/* Generate message only once: */
GC_libpthread_start = (ptr_t)1;
}
- if (!GC_text_mapping("/lib/ld-", &GC_libld_start, &GC_libld_end)) {
+ if (!GC_text_mapping("ld-", &GC_libld_start, &GC_libld_end)) {
WARN("Failed to find ld.so text mapping: Expect crash\n", 0);
}
}
void * calloc(size_t n, size_t lb)
{
-# if defined(GC_LINUX_THREADS) && !defined(USE_PROC_FOR_LIBRARIES)
+# if defined(GC_LINUX_THREADS) /* && !defined(USE_PROC_FOR_LIBRARIES) */
/* libpthread allocated some memory that is only pointed to by */
/* mmapped thread stacks. Make sure it's not collectable. */
{
if (p == 0) return;
/* Required by ANSI. It's not my fault ... */
+# ifdef LOG_ALLOCS
+ GC_err_printf("GC_free(%p): %d\n", p, GC_gc_no);
+# endif
h = HBLKPTR(p);
hhdr = HDR(h);
sz = hhdr -> hb_sz;
ngranules = BYTES_TO_GRANULES(sz);
- GC_ASSERT(GC_base(p) == p);
# if defined(REDIRECT_MALLOC) && \
(defined(GC_SOLARIS_THREADS) || defined(GC_LINUX_THREADS) \
|| defined(MSWIN32))
/* Don't try to deallocate that memory. */
if (0 == hhdr) return;
# endif
+ GC_ASSERT(GC_base(p) == p);
knd = hhdr -> hb_obj_kind;
ok = &GC_obj_kinds[knd];
if (EXPECT((ngranules <= MAXOBJGRANULES), 1)) {
# ifdef STUBBORN_ALLOC
GC_stubborn_init();
# endif
-# if defined(GC_LINUX_THREADS) && defined(REDIRECT_MALLOC)
- {
- extern void GC_init_lib_bounds(void);
- GC_init_lib_bounds();
- }
-# endif
/* Convince lint that some things are used */
# ifdef LINT
{
return FALSE;
}
-/* Find the text(code) mapping for the library whose name starts with nm. */
+#if defined(REDIRECT_MALLOC)
+/* Find the text(code) mapping for the library whose name, after */
+/* stripping the directory part, starts with nm. */
GC_bool GC_text_mapping(char *nm, ptr_t *startp, ptr_t *endp)
{
size_t nm_len = strlen(nm);
&prot, &maj_dev, &map_path);
if (buf_ptr == NULL) return FALSE;
- if (prot[0] == 'r' && prot[1] == '-' && prot[2] == 'x' &&
- strncmp(nm, map_path, nm_len) == 0) {
+ if (prot[0] == 'r' && prot[1] == '-' && prot[2] == 'x') {
+ char *p = map_path;
+ /* Set p to point just past last slash, if any. */
+ while (*p != '\0' && *p != '\n' && *p != ' ' && *p != '\t') ++p;
+ while (*p != '/' && p >= map_path) --p;
+ ++p;
+ if (strncmp(nm, p, nm_len) == 0) {
*startp = my_start;
*endp = my_end;
return TRUE;
+ }
}
}
return FALSE;
}
+#endif /* REDIRECT_MALLOC */
#ifdef IA64
static ptr_t backing_store_base_from_proc(void)
# include <stdarg.h>
#endif
+/* Call GC_INIT only on platforms on which we think we really need it, */
+/* so that we can test automatic initialization on the rest. */
+#if defined(__CYGWIN32__) || defined (_AIX)
+# define GC_COND_INIT() GC_INIT()
+#else
+# define GC_COND_INIT()
+#endif
/* Allocation Statistics */
int stubborn_count = 0;
/* Cheat and let stdio initialize toolbox for us. */
printf("Testing GC Macintosh port.\n");
# endif
- GC_INIT(); /* Only needed on a few platforms. */
+ GC_COND_INIT();
(void) GC_set_warn_proc(warn_proc);
# if (defined(MPROTECT_VDB) || defined(PROC_VDB) || defined(GWW_VDB)) \
&& !defined(MAKE_BACK_GRAPH) && !defined(NO_INCREMENTAL)
GC_use_DllMain(); /* Test with implicit thread registration if possible. */
GC_printf("Using DllMain to track threads\n");
# endif
- GC_INIT();
+ GC_COND_INIT();
# ifndef NO_INCREMENTAL
GC_enable_incremental();
# endif
pthread_win32_process_attach_np ();
pthread_win32_thread_attach_np ();
# endif
- GC_INIT();
+ GC_COND_INIT();
pthread_attr_init(&attr);
# if defined(GC_IRIX_THREADS) || defined(GC_FREEBSD_THREADS) \
/* Each thread structure must be initialized. */
/* This call must be made from the new thread. */
-/* Caller holds allocation lock. */
void GC_init_thread_local(GC_tlfs p)
{
int i;
+ GC_ASSERT(I_HOLD_LOCK());
if (!keys_initialized) {
if (0 != GC_key_create(&GC_thread_key, 0)) {
ABORT("Failed to create key for local allocator");
void *result;
void **tiny_fl;
-# if defined(REDIRECT_MALLOC) && !defined(USE_PTHREAD_SPECIFIC)
+# if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC)
GC_key_t k = GC_thread_key;
if (EXPECT(0 == k, 0)) {
/* We haven't yet run GC_init_parallel. That means */
}
tsd = GC_getspecific(k);
# else
- GC_ASSERT(GC_is_initialized);
tsd = GC_getspecific(GC_thread_key);
# endif
-# if defined(REDIRECT_MALLOC) && defined(USE_PTHREAD_SPECIFIC)
- if (EXPECT(NULL == tsd, 0)) {
+# if defined(USE_PTHREAD_SPECIFIC) || defined(USE_WIN32_SPECIFIC)
+ if (EXPECT(0 == tsd, 0)) {
return GC_core_malloc(bytes);
}
# endif
+ GC_ASSERT(GC_is_initialized);
# ifdef GC_ASSERTIONS
/* We can't check tsd correctly, since we don't have access to */
/* the right declarations. But we can check that it's close. */
tiny_fl = ((GC_tlfs)tsd) -> normal_freelists;
GC_FAST_MALLOC_GRANS(result, granules, tiny_fl, DIRECT_GRANULES,
NORMAL, GC_core_malloc(bytes), obj_link(result)=0);
+# ifdef LOG_ALLOCS
+ GC_err_printf("GC_malloc(%d) = %p : %d\n", bytes, result, GC_gc_no);
+# endif
return result;
}
void * GC_malloc_atomic(size_t bytes)
{
size_t granules = ROUNDED_UP_GRANULES(bytes);
+ void *tsd;
void *result;
void **tiny_fl;
+# if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC)
+ GC_key_t k = GC_thread_key;
+ if (EXPECT(0 == k, 0)) {
+ /* We haven't yet run GC_init_parallel. That means */
+ /* we also aren't locking, so this is fairly cheap. */
+ return GC_core_malloc(bytes);
+ }
+ tsd = GC_getspecific(k);
+# else
+ tsd = GC_getspecific(GC_thread_key);
+# endif
+# if defined(USE_PTHREAD_SPECIFIC) || defined(USE_WIN32_SPECIFIC)
+ if (EXPECT(0 == tsd, 0)) {
+ return GC_core_malloc(bytes);
+ }
+# endif
GC_ASSERT(GC_is_initialized);
- tiny_fl = ((GC_tlfs)GC_getspecific(GC_thread_key))
- -> ptrfree_freelists;
+ tiny_fl = ((GC_tlfs)tsd) -> ptrfree_freelists;
GC_FAST_MALLOC_GRANS(result, bytes, tiny_fl, DIRECT_GRANULES,
PTRFREE, GC_core_malloc_atomic(bytes), 0/* no init */);
return result;
/* are not necessarily free. And there may be cache fill order issues. */
/* For now, we punt with incremental GC. This probably means that */
/* incremental GC should be enabled before we fork a second thread. */
+/* Unlike the other thread local allocation calls, we assume that the */
+/* collector has been explicitly initialized. */
void * GC_gcj_malloc(size_t bytes,
void * ptr_to_struct_containing_descr)
{