2007-08-03 Hans Boehm <Hans.Boehm@hp.com>
authorhboehm <hboehm>
Sat, 4 Aug 2007 06:26:29 +0000 (06:26 +0000)
committerIvan Maidanski <ivmai@mail.ru>
Tue, 26 Jul 2011 17:06:41 +0000 (21:06 +0400)
* alloc.c, backgraph.c, headers.c, include/private/gc_priv.h:
Maintain GC_our_memory and GC_n_memory.
* dbg_mlc.c (GC_print_smashed_obj): Improve message.
(GC_print_all_smashed_proc): Pass client object address instead of
base.
* dyn_load.c (sort_heap_sects): New.  (GC_register_map_entries):
Register sections that are contiguous and merged with our heap.
* malloc.c, os_dep.c (GC_text_mapping): Check for just base name
of libraries.
* malloc.c (calloc): Check for special callers even with
USE_PROC_FOR_LIBRARIES. Move assertion.  Add rudimentary
malloc/free tracing.
* misc.c: No longer call GC_init_lib_bounds explicitly.
* thread_local_alloc.c (GC_malloc, GC_malloc_atomic): Always
initialize on demand.
* tests/test.c: Call GC_INIT only when required.

12 files changed:
ChangeLog
alloc.c
backgraph.c
dbg_mlc.c
dyn_load.c
headers.c
include/private/gc_priv.h
malloc.c
misc.c
os_dep.c
tests/test.c
thread_local_alloc.c

index 3c687a7..e37c008 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,23 @@
 2007-08-03  Hans Boehm <Hans.Boehm@hp.com>
+       
+       * alloc.c, backgraph.c, headers.c, include/private/gc_priv.h:
+       Maintain GC_our_memory and GC_n_memory.
+       * dbg_mlc.c (GC_print_smashed_obj): Improve message.
+       (GC_print_all_smashed_proc): Pass client object address instead of
+       base.
+       * dyn_load.c (sort_heap_sects): New.  (GC_register_map_entries):
+       Register sections that are contiguous and merged with our heap.
+       * malloc.c, os_dep.c (GC_text_mapping): Check for just base name
+       of libraries.
+       * malloc.c (calloc): Check for special callers even with
+       USE_PROC_FOR_LIBRARIES. Move assertion.  Add rudimentary
+       malloc/free tracing.
+       * misc.c: No longer call GC_init_lib_bounds explicitly.
+       * thread_local_alloc.c (GC_malloc, GC_malloc_atomic): Always
+       initialize on demand.
+       * tests/test.c: Call GC_INIT only when required.
+
+2007-08-03  Hans Boehm <Hans.Boehm@hp.com>
 
        * Makefile.direct: Remove comment fragment.
        * tests/tests.am: Add smashtest.
diff --git a/alloc.c b/alloc.c
index ac5fa16..2a8b2cd 100644 (file)
--- a/alloc.c
+++ b/alloc.c
@@ -759,6 +759,22 @@ void GC_gcollect(void)
 
 word GC_n_heap_sects = 0;      /* Number of sections currently in heap. */
 
+#ifdef USE_PROC_FOR_LIBRARIES
+  word GC_n_memory = 0;                /* Number of GET_MEM allocated memory   */
+                               /* sections.                            */
+#endif
+
+#ifdef USE_PROC_FOR_LIBRARIES
+  /* Add HBLKSIZE aligned, GET_MEM-generated block to GC_our_memory. */
+  /* Defined to do nothing if USE_PROC_FOR_LIBRARIES not set.      */
+  void GC_add_to_our_memory(ptr_t p, size_t bytes)
+  {
+    if (0 == p) return;
+    GC_our_memory[GC_n_memory].hs_start = p;
+    GC_our_memory[GC_n_memory].hs_bytes = bytes;
+    GC_n_memory++;
+  }
+#endif
 /*
  * Use the chunk of memory starting at p of size bytes as part of the heap.
  * Assumes p is HBLKSIZE aligned, and bytes is a multiple of HBLKSIZE.
@@ -868,6 +884,7 @@ GC_bool GC_expand_hp_inner(word n)
         return(FALSE);
     }
     space = GET_MEM(bytes);
+    GC_add_to_our_memory((ptr_t)space, bytes);
     if( space == 0 ) {
        if (GC_print_stats) {
            GC_log_printf("Failed to expand heap by %ld bytes\n",
index 59c330f..92d09e0 100644 (file)
@@ -86,6 +86,8 @@ static back_edges * new_back_edges(void)
   if (0 == back_edge_space) {
     back_edge_space = (back_edges *)
                        GET_MEM(MAX_BACK_EDGE_STRUCTS*sizeof(back_edges));
+    GC_add_to_our_memory((ptr_t)back_edge_space,
+                        MAX_BACK_EDGE_STRUCTS*sizeof(back_edges));
   }
   if (0 != avail_back_edges) {
     back_edges * result = avail_back_edges;
@@ -125,11 +127,15 @@ static void push_in_progress(ptr_t p)
     if (in_progress_size == 0) {
       in_progress_size = INITIAL_IN_PROGRESS;
       in_progress_space = (ptr_t *)GET_MEM(in_progress_size * sizeof(ptr_t));
+      GC_add_to_our_memory((ptr_t)in_progress_space,
+                          in_progress_size * sizeof(ptr_t));
     } else {
       ptr_t * new_in_progress_space;
       in_progress_size *= 2;
       new_in_progress_space = (ptr_t *)
                                GET_MEM(in_progress_size * sizeof(ptr_t));
+      GC_add_to_our_memory((ptr_t)new_in_progress_space,
+                          in_progress_size * sizeof(ptr_t));
       BCOPY(in_progress_space, new_in_progress_space,
            n_in_progress * sizeof(ptr_t));
       in_progress_space = new_in_progress_space;
index 4b72e39..70a23ab 100644 (file)
--- a/dbg_mlc.c
+++ b/dbg_mlc.c
@@ -402,12 +402,15 @@ void GC_debug_print_heap_obj_proc(ptr_t p)
 }
 
 #ifndef SHORT_DBG_HDRS
+/* Use GC_err_printf and friends to print a description of the object  */
+/* whose client-visible address is p, and which was smashed at         */
+/* clobbered_addr.                                                     */
 void GC_print_smashed_obj(ptr_t p, ptr_t clobbered_addr)
 {
     register oh * ohdr = (oh *)GC_base(p);
     
     GC_ASSERT(I_DONT_HOLD_LOCK());
-    GC_err_printf("%p in object at %p(", clobbered_addr, p);
+    GC_err_printf("%p in or near object at %p(", clobbered_addr, p);
     if (clobbered_addr <= (ptr_t)(&(ohdr -> oh_sz))
         || ohdr -> oh_string == 0) {
         GC_err_printf("<smashed>, appr. sz = %ld)\n",
@@ -842,7 +845,8 @@ void GC_print_all_smashed_proc(void)
     if (GC_n_smashed == 0) return;
     GC_err_printf("GC_check_heap_block: found smashed heap objects:\n");
     for (i = 0; i < GC_n_smashed; ++i) {
-        GC_print_smashed_obj(GC_base(GC_smashed[i]), GC_smashed[i]);
+        GC_print_smashed_obj((ptr_t)GC_base(GC_smashed[i]) + sizeof(oh),
+                            GC_smashed[i]);
        GC_smashed[i] = 0;
     }
     GC_n_smashed = 0;
index 36968ba..befb1f5 100644 (file)
@@ -239,6 +239,36 @@ char *GC_parse_map_entry(char *buf_ptr, ptr_t *start, ptr_t *end,
 char *GC_get_maps(void);
        /* From os_dep.c        */
 
+/* Sort an array of HeapSects by start address.                                */
+/* Unfortunately at least some versions of                             */
+/* Linux qsort end up calling malloc by way of sysconf, and hence can't */
+/* be used in the colector.  Hence we roll our own.  Should be         */
+/* reasonably fast if the array is already mostly sorted, as we expect */
+/* it to be.                                                           */
+void sort_heap_sects(struct HeapSect *base, size_t number_of_elements)
+{
+    signed_word n = (signed_word)number_of_elements;
+    signed_word nsorted = 1;
+    signed_word i;
+
+    while (nsorted < n) {
+      while (nsorted < n &&
+            base[nsorted-1].hs_start < base[nsorted].hs_start)
+          ++nsorted;
+      if (nsorted == n) break;
+      GC_ASSERT(base[nsorted-1].hs_start > base[nsorted].hs_start);
+      i = nsorted - 1;
+      while (i >= 0 && base[i].hs_start > base[i+1].hs_start) {
+        struct HeapSect tmp = base[i];
+       base[i] = base[i+1];
+       base[i+1] = tmp;
+       --i;
+      }
+      GC_ASSERT(base[nsorted-1].hs_start < base[nsorted].hs_start);
+      ++nsorted;
+    }
+}
+
 word GC_register_map_entries(char *maps)
 {
     char *prot;
@@ -250,18 +280,11 @@ word GC_register_map_entries(char *maps)
     unsigned i;
     ptr_t datastart = (ptr_t)(DATASTART);
 
-    /* Compute heap bounds. FIXME: Should work if heap and roots are   */
-    /* interleaved?                                                    */
-       least_ha = (ptr_t)(word)(-1);
-       greatest_ha = 0;
-       for (i = 0; i < GC_n_heap_sects; ++i) {
-           ptr_t sect_start = GC_heap_sects[i].hs_start;
-           ptr_t sect_end = sect_start + GC_heap_sects[i].hs_bytes;
-           if (sect_start < least_ha) least_ha = sect_start;
-           if (sect_end > greatest_ha) greatest_ha = sect_end;
-        }
-       if (greatest_ha < (ptr_t)GC_scratch_last_end_ptr)
-           greatest_ha = (ptr_t)GC_scratch_last_end_ptr; 
+    GC_ASSERT(I_HOLD_LOCK());
+    sort_heap_sects(GC_our_memory, GC_n_memory);
+    least_ha = GC_our_memory[0].hs_start;
+    greatest_ha = GC_our_memory[GC_n_memory-1].hs_start
+                 + GC_our_memory[GC_n_memory-1].hs_bytes;
 
     for (;;) {
         buf_ptr = GC_parse_map_entry(buf_ptr, &start, &end, &prot, &maj_dev, 0);
@@ -280,25 +303,53 @@ word GC_register_map_entries(char *maps)
              /* That can fail because the stack may disappear while    */
              /* we're marking.  Thus the marker is, and has to be      */
              /* prepared to recover from segmentation faults.          */
+
              if (GC_segment_is_thread_stack(start, end)) continue;
-             /* FIXME: REDIRECT_MALLOC actually works with threads on  */
-             /* LINUX/IA64 if we omit this check.  The problem is that */
+
+             /* FIXME: NPTL squirrels                                  */
+             /* away pointers in pieces of the stack segment that we   */
+             /* don't scan.  We work around this                       */
+             /* by treating anything allocated by libpthread as        */
+             /* uncollectable, as we do in some other cases.           */
+             /* A specifically identified problem is that              */ 
              /* thread stacks contain pointers to dynamic thread       */
              /* vectors, which may be reused due to thread caching.    */
-             /* Currently they may not be marked if the thread is      */
-             /* still live.                                            */
-             /* For dead threads, we trace the whole stack, which is   */
+             /* They may not be marked if the thread is still live.    */
+             /* This specific instance should be addressed by          */
+             /* INCLUDE_LINUX_THREAD_DESCR, but that doesn't quite     */
+             /* seem to suffice.                                       */
+             /* We currently trace entire thread stacks, if they are   */
+             /* are currently cached but unused.  This is              */
              /* very suboptimal for performance reasons.               */
 #          endif
            /* We no longer exclude the main data segment.              */
-           if (start < least_ha && end > least_ha) {
-               end = least_ha;
-           }
-           if (start < greatest_ha && end > greatest_ha) {
-               start = greatest_ha;
+           if (end <= least_ha || start >= greatest_ha) {
+             /* The easy case; just trace entire segment */
+             GC_add_roots_inner((char *)start, (char *)end, TRUE);
+             continue;
            }
-           if (start >= least_ha && end <= greatest_ha) continue;
-           GC_add_roots_inner((char *)start, (char *)end, TRUE);
+           /* Add sections that dont belong to us. */
+             i = 0;
+             while (GC_our_memory[i].hs_start + GC_our_memory[i].hs_bytes
+                    < start)
+                 ++i;
+             GC_ASSERT(i < GC_n_memory);
+             if (GC_our_memory[i].hs_start <= start) {
+                 start = GC_our_memory[i].hs_start
+                         + GC_our_memory[i].hs_bytes;
+                 ++i;
+             }
+             while (i < GC_n_memory && GC_our_memory[i].hs_start < end
+                    && start < end) {
+                 if ((char *)start < GC_our_memory[i].hs_start)
+                   GC_add_roots_inner((char *)start,
+                                      GC_our_memory[i].hs_start, TRUE);
+                 start = GC_our_memory[i].hs_start
+                         + GC_our_memory[i].hs_bytes;
+                 ++i;
+             }
+             if (start < end)
+                 GC_add_roots_inner((char *)start, (char *)end, TRUE);
        }
     }
     return 1;
index 6b4eb84..7aef710 100644 (file)
--- a/headers.c
+++ b/headers.c
@@ -132,11 +132,13 @@ ptr_t GC_scratch_alloc(size_t bytes)
                bytes_to_get &= ~(GC_page_size - 1);
 #          endif
            result = (ptr_t)GET_MEM(bytes_to_get);
+           GC_add_to_our_memory(result, bytes_to_get);
             scratch_free_ptr -= bytes;
            GC_scratch_last_end_ptr = result + bytes;
             return(result);
         }
         result = (ptr_t)GET_MEM(bytes_to_get);
+        GC_add_to_our_memory(result, bytes_to_get);
         if (result == 0) {
            if (GC_print_stats)
                 GC_printf("Out of memory - trying to allocate less\n");
@@ -146,7 +148,9 @@ ptr_t GC_scratch_alloc(size_t bytes)
                bytes_to_get += GC_page_size - 1;
                bytes_to_get &= ~(GC_page_size - 1);
 #          endif
-            return((ptr_t)GET_MEM(bytes_to_get));
+            result = (ptr_t)GET_MEM(bytes_to_get);
+            GC_add_to_our_memory(result, bytes_to_get);
+           return result;
         }
         scratch_free_ptr = result;
         GC_scratch_end_ptr = scratch_free_ptr + bytes_to_get;
index ec93ffe..1c67736 100644 (file)
@@ -972,7 +972,14 @@ struct _GC_arrays {
 # endif
   struct HeapSect {
       ptr_t hs_start; size_t hs_bytes;
-  } _heap_sects[MAX_HEAP_SECTS];
+  } _heap_sects[MAX_HEAP_SECTS];       /* Heap segments potentially    */
+                                       /* client objects.              */
+# if defined(USE_PROC_FOR_LIBRARIES)
+     struct HeapSect _our_memory[MAX_HEAP_SECTS];
+                                       /* All GET_MEM allocated        */
+                                       /* memory.  Includes block      */
+                                       /* headers and the like.        */
+# endif
 # if defined(MSWIN32) || defined(MSWINCE)
     ptr_t _heap_bases[MAX_HEAP_SECTS];
                /* Start address of memory regions obtained from kernel. */
@@ -1040,6 +1047,9 @@ GC_API GC_FAR struct _GC_arrays GC_arrays;
 # define GC_requested_heapsize GC_arrays._requested_heapsize
 # define GC_bytes_allocd_before_gc GC_arrays._bytes_allocd_before_gc
 # define GC_heap_sects GC_arrays._heap_sects
+# ifdef USE_PROC_FOR_LIBRARIES
+#   define GC_our_memory GC_arrays._our_memory
+# endif
 # define GC_last_stack GC_arrays._last_stack
 #ifdef ENABLE_TRACE
 #define GC_trace_addr GC_arrays._trace_addr
@@ -1139,6 +1149,11 @@ GC_API word GC_fo_entries;
 extern word GC_n_heap_sects;   /* Number of separately added heap      */
                                /* sections.                            */
 
+#ifdef USE_PROC_FOR_LIBRARIES
+  extern word GC_n_memory;     /* Number of GET_MEM allocated memory   */
+                               /* sections.                            */
+#endif
+
 extern word GC_page_size;
 
 # if defined(MSWIN32) || defined(MSWINCE)
@@ -1713,6 +1728,14 @@ GC_API void GC_debug_invoke_finalizer(void * obj, void * data);
                        
 void GC_add_to_heap(struct hblk *p, size_t bytes);
                        /* Add a HBLKSIZE aligned chunk to the heap.    */
+
+#ifdef USE_PROC_FOR_LIBRARIES
+  void GC_add_to_our_memory(ptr_t p, size_t bytes);
+                       /* Add a chunk to GC_our_memory.        */
+                       /* If p == 0, do nothing.               */
+#else
+# define GC_add_to_our_memory(p, bytes)
+#endif
   
 void GC_print_obj(ptr_t p);
                        /* P points to somewhere inside an object with  */
index 9403264..4795048 100644 (file)
--- a/malloc.c
+++ b/malloc.c
@@ -319,7 +319,7 @@ void * malloc(size_t lb)
     return((void *)REDIRECT_MALLOC(lb));
   }
 
-#ifdef GC_LINUX_THREADS
+#if defined(GC_LINUX_THREADS) /* && !defined(USE_PROC_FOR_LIBRARIES) */
   static ptr_t GC_libpthread_start = 0;
   static ptr_t GC_libpthread_end = 0;
   static ptr_t GC_libld_start = 0;
@@ -330,17 +330,15 @@ void * malloc(size_t lb)
   void GC_init_lib_bounds(void)
   {
     if (GC_libpthread_start != 0) return;
-    if (!GC_text_mapping("/lib/tls/libpthread-",
-                        &GC_libpthread_start, &GC_libpthread_end)
-       && !GC_text_mapping("/lib/libpthread-",
-                           &GC_libpthread_start, &GC_libpthread_end)) {
+    if (!GC_text_mapping("libpthread-",
+                        &GC_libpthread_start, &GC_libpthread_end)) {
        WARN("Failed to find libpthread.so text mapping: Expect crash\n", 0);
         /* This might still work with some versions of libpthread,     */
        /* so we don't abort.  Perhaps we should.                       */
         /* Generate message only once:                                 */
           GC_libpthread_start = (ptr_t)1;
     }
-    if (!GC_text_mapping("/lib/ld-", &GC_libld_start, &GC_libld_end)) {
+    if (!GC_text_mapping("ld-", &GC_libld_start, &GC_libld_end)) {
        WARN("Failed to find ld.so text mapping: Expect crash\n", 0);
     }
   }
@@ -348,7 +346,7 @@ void * malloc(size_t lb)
 
 void * calloc(size_t n, size_t lb)
 {
-#   if defined(GC_LINUX_THREADS) && !defined(USE_PROC_FOR_LIBRARIES)
+#   if defined(GC_LINUX_THREADS) /* && !defined(USE_PROC_FOR_LIBRARIES) */
        /* libpthread allocated some memory that is only pointed to by  */
        /* mmapped thread stacks.  Make sure it's not collectable.      */
        {
@@ -406,11 +404,13 @@ void GC_free(void * p)
 
     if (p == 0) return;
        /* Required by ANSI.  It's not my fault ...     */
+#   ifdef LOG_ALLOCS
+      GC_err_printf("GC_free(%p): %d\n", p, GC_gc_no);
+#   endif
     h = HBLKPTR(p);
     hhdr = HDR(h);
     sz = hhdr -> hb_sz;
     ngranules = BYTES_TO_GRANULES(sz);
-    GC_ASSERT(GC_base(p) == p);
 #   if defined(REDIRECT_MALLOC) && \
        (defined(GC_SOLARIS_THREADS) || defined(GC_LINUX_THREADS) \
         || defined(MSWIN32))
@@ -420,6 +420,7 @@ void GC_free(void * p)
        /* Don't try to deallocate that memory.                         */
        if (0 == hhdr) return;
 #   endif
+    GC_ASSERT(GC_base(p) == p);
     knd = hhdr -> hb_obj_kind;
     ok = &GC_obj_kinds[knd];
     if (EXPECT((ngranules <= MAXOBJGRANULES), 1)) {
diff --git a/misc.c b/misc.c
index d846e9d..bcaab3f 100644 (file)
--- a/misc.c
+++ b/misc.c
@@ -731,12 +731,6 @@ void GC_init_inner()
 #   ifdef STUBBORN_ALLOC
        GC_stubborn_init();
 #   endif
-#   if defined(GC_LINUX_THREADS) && defined(REDIRECT_MALLOC)
-       {
-         extern void GC_init_lib_bounds(void);
-         GC_init_lib_bounds();
-       }
-#   endif
     /* Convince lint that some things are used */
 #   ifdef LINT
       {
index 8f83825..cba653b 100644 (file)
--- a/os_dep.c
+++ b/os_dep.c
@@ -373,7 +373,9 @@ GC_bool GC_enclosing_mapping(ptr_t addr, ptr_t *startp, ptr_t *endp)
   return FALSE;
 }
 
-/* Find the text(code) mapping for the library whose name starts with nm. */
+#if defined(REDIRECT_MALLOC)
+/* Find the text(code) mapping for the library whose name, after       */
+/* stripping the directory part, starts with nm.                       */
 GC_bool GC_text_mapping(char *nm, ptr_t *startp, ptr_t *endp)
 {
   size_t nm_len = strlen(nm);
@@ -390,15 +392,22 @@ GC_bool GC_text_mapping(char *nm, ptr_t *startp, ptr_t *endp)
                                 &prot, &maj_dev, &map_path);
 
     if (buf_ptr == NULL) return FALSE;
-    if (prot[0] == 'r' && prot[1] == '-' && prot[2] == 'x' &&
-       strncmp(nm, map_path, nm_len) == 0) {
+    if (prot[0] == 'r' && prot[1] == '-' && prot[2] == 'x') {
+       char *p = map_path;
+       /* Set p to point just past last slash, if any. */
+         while (*p != '\0' && *p != '\n' && *p != ' ' && *p != '\t') ++p;
+         while (*p != '/' && p >= map_path) --p;
+         ++p;
+       if (strncmp(nm, p, nm_len) == 0) {
          *startp = my_start;
          *endp = my_end;
          return TRUE;
+       }
     }
   }
   return FALSE;
 }
+#endif /* REDIRECT_MALLOC */
 
 #ifdef IA64
 static ptr_t backing_store_base_from_proc(void)
index 47cf99b..a6db6ff 100644 (file)
 # include <stdarg.h>
 #endif
 
+/* Call GC_INIT only on platforms on which we think we really need it, */
+/* so that we can test automatic initialization on the rest.           */
+#if defined(__CYGWIN32__) || defined (_AIX)
+#  define GC_COND_INIT() GC_INIT()
+#else
+#  define GC_COND_INIT()
+#endif
 
 /* Allocation Statistics */
 int stubborn_count = 0;
@@ -1318,7 +1325,7 @@ void SetMinimumStack(long minSize)
        /* Cheat and let stdio initialize toolbox for us.       */
        printf("Testing GC Macintosh port.\n");
 #   endif
-    GC_INIT(); /* Only needed on a few platforms.      */
+    GC_COND_INIT();
     (void) GC_set_warn_proc(warn_proc);
 #   if (defined(MPROTECT_VDB) || defined(PROC_VDB) || defined(GWW_VDB)) \
           && !defined(MAKE_BACK_GRAPH) && !defined(NO_INCREMENTAL)
@@ -1465,7 +1472,7 @@ int APIENTRY WinMain(HINSTANCE instance, HINSTANCE prev, LPSTR cmd, int n)
     GC_use_DllMain();  /* Test with implicit thread registration if possible. */
     GC_printf("Using DllMain to track threads\n");
 # endif
-  GC_INIT();
+  GC_COND_INIT();
 # ifndef NO_INCREMENTAL
     GC_enable_incremental();
 # endif
@@ -1575,7 +1582,7 @@ int main()
        pthread_win32_process_attach_np ();
        pthread_win32_thread_attach_np ();
 #   endif
-    GC_INIT();
+    GC_COND_INIT();
 
     pthread_attr_init(&attr);
 #   if defined(GC_IRIX_THREADS) || defined(GC_FREEBSD_THREADS) \
index f747b35..fa1499e 100644 (file)
@@ -73,11 +73,11 @@ static void return_freelists(void **fl, void **gfl)
 
 /* Each thread structure must be initialized.  */
 /* This call must be made from the new thread. */
-/* Caller holds allocation lock.               */
 void GC_init_thread_local(GC_tlfs p)
 {
     int i;
 
+    GC_ASSERT(I_HOLD_LOCK());
     if (!keys_initialized) {
        if (0 != GC_key_create(&GC_thread_key, 0)) {
            ABORT("Failed to create key for local allocator");
@@ -141,7 +141,7 @@ void * GC_malloc(size_t bytes)
     void *result;
     void **tiny_fl;
 
-#   if defined(REDIRECT_MALLOC) && !defined(USE_PTHREAD_SPECIFIC)
+#   if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC)
       GC_key_t k = GC_thread_key;
       if (EXPECT(0 == k, 0)) {
        /* We haven't yet run GC_init_parallel.  That means     */
@@ -150,14 +150,14 @@ void * GC_malloc(size_t bytes)
       }
       tsd = GC_getspecific(k);
 #   else
-      GC_ASSERT(GC_is_initialized);
       tsd = GC_getspecific(GC_thread_key);
 #   endif
-#   if defined(REDIRECT_MALLOC) && defined(USE_PTHREAD_SPECIFIC)
-      if (EXPECT(NULL == tsd, 0)) {
+#   if defined(USE_PTHREAD_SPECIFIC) || defined(USE_WIN32_SPECIFIC)
+      if (EXPECT(0 == tsd, 0)) {
        return GC_core_malloc(bytes);
       }
 #   endif
+    GC_ASSERT(GC_is_initialized);
 #   ifdef GC_ASSERTIONS
       /* We can't check tsd correctly, since we don't have access to   */
       /* the right declarations.  But we can check that it's close.    */
@@ -175,18 +175,37 @@ void * GC_malloc(size_t bytes)
     tiny_fl = ((GC_tlfs)tsd) -> normal_freelists;
     GC_FAST_MALLOC_GRANS(result, granules, tiny_fl, DIRECT_GRANULES,
                         NORMAL, GC_core_malloc(bytes), obj_link(result)=0);
+#   ifdef LOG_ALLOCS
+      GC_err_printf("GC_malloc(%d) = %p : %d\n", bytes, result, GC_gc_no);
+#   endif
     return result;
 }
 
 void * GC_malloc_atomic(size_t bytes)
 {
     size_t granules = ROUNDED_UP_GRANULES(bytes);
+    void *tsd;
     void *result;
     void **tiny_fl;
 
+#   if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC)
+      GC_key_t k = GC_thread_key;
+      if (EXPECT(0 == k, 0)) {
+       /* We haven't yet run GC_init_parallel.  That means     */
+       /* we also aren't locking, so this is fairly cheap.     */
+       return GC_core_malloc(bytes);
+      }
+      tsd = GC_getspecific(k);
+#   else
+      tsd = GC_getspecific(GC_thread_key);
+#   endif
+#   if defined(USE_PTHREAD_SPECIFIC) || defined(USE_WIN32_SPECIFIC)
+      if (EXPECT(0 == tsd, 0)) {
+       return GC_core_malloc(bytes);
+      }
+#   endif
     GC_ASSERT(GC_is_initialized);
-    tiny_fl = ((GC_tlfs)GC_getspecific(GC_thread_key))
-                                       -> ptrfree_freelists;
+    tiny_fl = ((GC_tlfs)tsd) -> ptrfree_freelists;
     GC_FAST_MALLOC_GRANS(result, bytes, tiny_fl, DIRECT_GRANULES,
                         PTRFREE, GC_core_malloc_atomic(bytes), 0/* no init */);
     return result;
@@ -220,6 +239,8 @@ extern int GC_gcj_kind;
 /* are not necessarily free.  And there may be cache fill order issues.        */
 /* For now, we punt with incremental GC.  This probably means that     */
 /* incremental GC should be enabled before we fork a second thread.    */
+/* Unlike the other thread local allocation calls, we assume that the  */
+/* collector has been explicitly initialized.                          */
 void * GC_gcj_malloc(size_t bytes,
                     void * ptr_to_struct_containing_descr)
 {