gc7.0alpha5 tarball import

author Ivan Maidanski <ivmai@mail.ru>

Tue, 26 Jul 2011 16:30:36 +0000 (20:30 +0400)

committer Ivan Maidanski <ivmai@mail.ru>

Tue, 26 Jul 2011 16:30:36 +0000 (20:30 +0400)
author Ivan Maidanski <ivmai@mail.ru>
Tue, 26 Jul 2011 16:30:36 +0000 (20:30 +0400)
committer Ivan Maidanski <ivmai@mail.ru>
Tue, 26 Jul 2011 16:30:36 +0000 (20:30 +0400)
diff --git a/Makefile b/Makefile

index 24326c0..78e8159 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -32,7 +32,7 @@ VPATH= $(srcdir)
  
  # Atomic_ops installation directory.  If this doesn't exist, we create
  # it from the included libatomic_ops distribution.
-AO_VERSION=1.0
+AO_VERSION=1.1
  AO_SRC_DIR=$(srcdir)/libatomic_ops-$(AO_VERSION)
  AO_INSTALL_DIR=$(srcdir)/libatomic_ops-install
  
@@ -349,7 +349,7 @@ SRCS= $(CSRCS) mips_sgi_mach_dep.s rs6000_mach_dep.s alpha_mach_dep.S \
  
  DOC_FILES= README.QUICK doc/README.Mac doc/README.MacOSX doc/README.OS2 \
         doc/README.amiga doc/README.cords doc/debugging.html \
-       doc/porting.html \
+       doc/porting.html doc/overview.html \
         doc/README.dj doc/README.hp doc/README.linux doc/README.rs6000 \
         doc/README.sgi doc/README.solaris2 doc/README.uts \
         doc/README.win32 doc/barrett_diagram doc/README \
@@ -450,8 +450,9 @@ $(OBJS) tests/test.o dyn_load.o dyn_load_sunos53.o: \
  mark.o typd_mlc.o finalize.o ptr_chck.o: $(srcdir)/include/gc_mark.h \
                                          $(srcdir)/include/private/gc_pmark.h
  
-specific.o pthread_support.o: $(srcdir)/include/private/specific.h \
-                             $(srcdir)/include/gc_inline.h
+specific.o pthread_support.o thread_local_alloc.o win32_threads.o: \
+       $(srcdir)/include/private/specific.h $(srcdir)/include/gc_inline.h \
+       $(srcdir)/include/private/thread_local_alloc.h
  
  dbg_mlc.o gcj_mlc.o: $(srcdir)/include/private/dbg_mlc.h
  
@@ -465,6 +466,7 @@ tests:
  base_lib gc.a: $(OBJS) dyn_load.o $(UTILS)
         echo > base_lib
         rm -f dont_ar_1
+       cp $(AO_INSTALL_DIR)/lib/libatomic_ops.a gc.a
         ./if_mach SPARC SUNOS5 touch dont_ar_1
         ./if_mach SPARC SUNOS5 $(AR) rus gc.a $(OBJS) dyn_load.o
         ./if_mach M68K AMIGA touch dont_ar_1
@@ -513,7 +515,7 @@ dyn_load_sunos53.o: dyn_load.c
  
  # SunOS5 shared library version of the collector
  sunos5gc.so: $(OBJS) dyn_load_sunos53.o
-       $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o -ldl
+       $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o $(AO_INSTALL_DIR)/lib/libatomic_ops.a -ldl
         ln sunos5gc.so libgc.so
  
  # Alpha/OSF shared library version of the collector
@@ -556,14 +558,11 @@ mach_dep.o: $(srcdir)/mach_dep.c $(srcdir)/mips_sgi_mach_dep.s \
             $(srcdir)/ia64_save_regs_in_stack.s \
             $(srcdir)/sparc_netbsd_mach_dep.s $(UTILS)
         rm -f mach_dep.o
-       ./if_mach MIPS IRIX5 $(CC) -c -o mach_dep.o $(srcdir)/mips_sgi_mach_dep.s
-       ./if_mach MIPS RISCOS $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
-       ./if_mach MIPS ULTRIX $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
-       ./if_mach POWERPC DARWIN $(AS) -o mach_dep.o $(srcdir)/powerpc_darwin_mach_dep.s
-       ./if_mach ALPHA LINUX $(CC) -c -o mach_dep.o $(srcdir)/alpha_mach_dep.S
-       ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep.o $(srcdir)/sparc_mach_dep.S
-       ./if_mach SPARC OPENBSD $(AS) -o mach_dep.o $(srcdir)/sparc_sunos4_mach_dep.s
-       ./if_mach SPARC NETBSD $(AS) -o mach_dep.o $(srcdir)/sparc_netbsd_mach_dep.s
+       ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep2.o $(srcdir)/sparc_mach_dep.S
+       ./if_mach SPARC OPENBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_sunos4_mach_dep.s
+       ./if_mach SPARC NETBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_netbsd_mach_dep.s
+       ./if_mach SPARC "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
+       ./if_mach SPARC "" ld -r -o mach_dep.o mach_dep1.o mach_dep2.o
         ./if_mach IA64 "" as $(AS_ABI_FLAG) -o ia64_save_regs_in_stack.o $(srcdir)/ia64_save_regs_in_stack.s
         ./if_mach IA64 "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
         ./if_mach IA64 "" ld -r -o mach_dep.o mach_dep1.o ia64_save_regs_in_stack.o
@@ -638,7 +637,7 @@ gctest: tests/test.o gc.a $(UTILS)
  # If an optimized setjmp_test generates a segmentation fault,
  # odds are your compiler is broken.  Gctest may still work.
  # Try compiling setjmp_t.c unoptimized.
-setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS)
+setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) $(AO_INSTALL_DIR)
         $(CC) $(CFLAGS) -o setjmp_test $(srcdir)/setjmp_t.c
  
  test:  KandRtest cord/cordtest
diff --git a/Makefile.direct b/Makefile.direct

index 24326c0..78e8159 100644 (file)
--- a/Makefile.direct
+++ b/Makefile.direct
@@ -32,7 +32,7 @@ VPATH= $(srcdir)
  
  # Atomic_ops installation directory.  If this doesn't exist, we create
  # it from the included libatomic_ops distribution.
-AO_VERSION=1.0
+AO_VERSION=1.1
  AO_SRC_DIR=$(srcdir)/libatomic_ops-$(AO_VERSION)
  AO_INSTALL_DIR=$(srcdir)/libatomic_ops-install
  
@@ -349,7 +349,7 @@ SRCS= $(CSRCS) mips_sgi_mach_dep.s rs6000_mach_dep.s alpha_mach_dep.S \
  
  DOC_FILES= README.QUICK doc/README.Mac doc/README.MacOSX doc/README.OS2 \
         doc/README.amiga doc/README.cords doc/debugging.html \
-       doc/porting.html \
+       doc/porting.html doc/overview.html \
         doc/README.dj doc/README.hp doc/README.linux doc/README.rs6000 \
         doc/README.sgi doc/README.solaris2 doc/README.uts \
         doc/README.win32 doc/barrett_diagram doc/README \
@@ -450,8 +450,9 @@ $(OBJS) tests/test.o dyn_load.o dyn_load_sunos53.o: \
  mark.o typd_mlc.o finalize.o ptr_chck.o: $(srcdir)/include/gc_mark.h \
                                          $(srcdir)/include/private/gc_pmark.h
  
-specific.o pthread_support.o: $(srcdir)/include/private/specific.h \
-                             $(srcdir)/include/gc_inline.h
+specific.o pthread_support.o thread_local_alloc.o win32_threads.o: \
+       $(srcdir)/include/private/specific.h $(srcdir)/include/gc_inline.h \
+       $(srcdir)/include/private/thread_local_alloc.h
  
  dbg_mlc.o gcj_mlc.o: $(srcdir)/include/private/dbg_mlc.h
  
@@ -465,6 +466,7 @@ tests:
  base_lib gc.a: $(OBJS) dyn_load.o $(UTILS)
         echo > base_lib
         rm -f dont_ar_1
+       cp $(AO_INSTALL_DIR)/lib/libatomic_ops.a gc.a
         ./if_mach SPARC SUNOS5 touch dont_ar_1
         ./if_mach SPARC SUNOS5 $(AR) rus gc.a $(OBJS) dyn_load.o
         ./if_mach M68K AMIGA touch dont_ar_1
@@ -513,7 +515,7 @@ dyn_load_sunos53.o: dyn_load.c
  
  # SunOS5 shared library version of the collector
  sunos5gc.so: $(OBJS) dyn_load_sunos53.o
-       $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o -ldl
+       $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o $(AO_INSTALL_DIR)/lib/libatomic_ops.a -ldl
         ln sunos5gc.so libgc.so
  
  # Alpha/OSF shared library version of the collector
@@ -556,14 +558,11 @@ mach_dep.o: $(srcdir)/mach_dep.c $(srcdir)/mips_sgi_mach_dep.s \
             $(srcdir)/ia64_save_regs_in_stack.s \
             $(srcdir)/sparc_netbsd_mach_dep.s $(UTILS)
         rm -f mach_dep.o
-       ./if_mach MIPS IRIX5 $(CC) -c -o mach_dep.o $(srcdir)/mips_sgi_mach_dep.s
-       ./if_mach MIPS RISCOS $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
-       ./if_mach MIPS ULTRIX $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
-       ./if_mach POWERPC DARWIN $(AS) -o mach_dep.o $(srcdir)/powerpc_darwin_mach_dep.s
-       ./if_mach ALPHA LINUX $(CC) -c -o mach_dep.o $(srcdir)/alpha_mach_dep.S
-       ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep.o $(srcdir)/sparc_mach_dep.S
-       ./if_mach SPARC OPENBSD $(AS) -o mach_dep.o $(srcdir)/sparc_sunos4_mach_dep.s
-       ./if_mach SPARC NETBSD $(AS) -o mach_dep.o $(srcdir)/sparc_netbsd_mach_dep.s
+       ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep2.o $(srcdir)/sparc_mach_dep.S
+       ./if_mach SPARC OPENBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_sunos4_mach_dep.s
+       ./if_mach SPARC NETBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_netbsd_mach_dep.s
+       ./if_mach SPARC "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
+       ./if_mach SPARC "" ld -r -o mach_dep.o mach_dep1.o mach_dep2.o
         ./if_mach IA64 "" as $(AS_ABI_FLAG) -o ia64_save_regs_in_stack.o $(srcdir)/ia64_save_regs_in_stack.s
         ./if_mach IA64 "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
         ./if_mach IA64 "" ld -r -o mach_dep.o mach_dep1.o ia64_save_regs_in_stack.o
@@ -638,7 +637,7 @@ gctest: tests/test.o gc.a $(UTILS)
  # If an optimized setjmp_test generates a segmentation fault,
  # odds are your compiler is broken.  Gctest may still work.
  # Try compiling setjmp_t.c unoptimized.
-setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS)
+setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) $(AO_INSTALL_DIR)
         $(CC) $(CFLAGS) -o setjmp_test $(srcdir)/setjmp_t.c
  
  test:  KandRtest cord/cordtest
diff --git a/Makefile.in b/Makefile.in

index 0c881db..68708ef 100644 (file)
--- a/Makefile.in
+++ b/Makefile.in
@@ -527,6 +527,7 @@ dist_pkgdata_DATA = \
         doc/README.solaris2 \
         doc/README.uts \
         doc/README.win32 \
+       doc/overview.html \
         doc/tree.html \
         doc/leak.html \
         doc/gcinterface.html \
diff --git a/NT_STATIC_THREADS_MAKEFILE b/NT_STATIC_THREADS_MAKEFILE

index 91fb7f6..f37d6d1 100644 (file)
--- a/NT_STATIC_THREADS_MAKEFILE
+++ b/NT_STATIC_THREADS_MAKEFILE
@@ -10,8 +10,8 @@ CPU=$(MY_CPU)
  # should do, since we only need the headers.
  # We assume this was manually unpacked, since I'm not sure there is
  # a Windows standard command line tool to do this.
-AO_VERSION=0.6
-AO_SRC_DIR=$(srcdir)/atomic_ops-$(AO_VERSION)
+AO_VERSION=1.1
+AO_SRC_DIR=libatomic_ops-$(AO_VERSION)/src
  AO_INCLUDE_DIR=$(AO_SRC_DIR)
  
  OBJS= alloc.obj reclaim.obj allchblk.obj misc.obj mach_dep.obj os_dep.obj mark_rts.obj headers.obj mark.obj obj_map.obj blacklst.obj finalize.obj new_hblk.obj dbg_mlc.obj malloc.obj stubborn.obj dyn_load.obj typd_mlc.obj ptr_chck.obj gc_cpp.obj mallocx.obj win32_threads.obj
diff --git a/allchblk.c b/allchblk.c

index ad55bed..997580a 100644 (file)
--- a/allchblk.c
+++ b/allchblk.c
@@ -560,7 +560,7 @@ int index;  /* Index of free list */
                                 /* free blocks in GC_add_to_fl.         */
  #     endif
  #   ifdef USE_MUNMAP
-      hhdr -> hb_last_reclaimed = GC_gc_no;
+      hhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no;
  #   endif
      hhdr -> hb_sz = h_size;
      GC_add_to_fl(h, hhdr);
@@ -568,7 +568,7 @@ int index;  /* Index of free list */
  }
         
  struct hblk *
-GC_allochblk_nth(word sz/* bytes */, int kind, unsigned char flags, int n);
+GC_allochblk_nth(size_t sz/* bytes */, int kind, unsigned char flags, int n);
  
  /*
   * Allocate (and return pointer to) a heap block
@@ -580,7 +580,7 @@ GC_allochblk_nth(word sz/* bytes */, int kind, unsigned char flags, int n);
   * The client is responsible for clearing the block, if necessary.
   */
  struct hblk *
-GC_allochblk(size_t sz, int kind, unsigned flags/* IGNORE_OFF_PAGE or 0 */)
+GC_allochblk(size_t sz, int kind, unsigned char flags/* IGNORE_OFF_PAGE or 0 */)
  {
      word blocks;
      int start_list;
@@ -603,7 +603,7 @@ GC_allochblk(size_t sz, int kind, unsigned flags/* IGNORE_OFF_PAGE or 0 */)
   * Unlike the above, sz is in bytes.
   */
  struct hblk *
-GC_allochblk_nth(word sz, int kind, unsigned char flags, int n)
+GC_allochblk_nth(size_t sz, int kind, unsigned char flags, int n)
  {
      struct hblk *hbp;
      hdr * hhdr;                /* Header corr. to hbp */
@@ -822,7 +822,7 @@ signed_word size;
      GC_remove_counts(hbp, (word)size);
      hhdr->hb_sz = size;
  #   ifdef USE_MUNMAP
-      hhdr -> hb_last_reclaimed = GC_gc_no;
+      hhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no;
  #   endif
      
      /* Check for duplicate deallocation in the easy case */
@@ -849,7 +849,7 @@ signed_word size;
           GC_remove_from_fl(prevhdr, FL_UNKNOWN);
           prevhdr -> hb_sz += hhdr -> hb_sz;
  #        ifdef USE_MUNMAP
-           prevhdr -> hb_last_reclaimed = GC_gc_no;
+           prevhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no;
  #        endif
           GC_remove_header(hbp);
           hbp = prev;
diff --git a/alloc.c b/alloc.c

index 1be4516..0e292f7 100644 (file)
--- a/alloc.c
+++ b/alloc.c
@@ -569,8 +569,16 @@ void GC_clear_fl_marks(ptr_t q)
         }
         bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, sz);
         if (mark_bit_from_hdr(hhdr, bit_no)) {
+         int n_marks = hhdr -> hb_n_marks - 1;
           clear_mark_bit_from_hdr(hhdr, bit_no);
-          --hhdr -> hb_n_marks;
+#        ifdef PARALLEL_MARK
+           /* Appr. count, don't decrement to zero! */
+           if (0 != n_marks) {
+              hhdr -> hb_n_marks = n_marks;
+           }
+#        else
+            hhdr -> hb_n_marks = n_marks;
+#        endif
          }
         GC_bytes_found -= sz;
     }
diff --git a/configure b/configure

index 36c8a6f..7e58904 100755 (executable)
--- a/configure
+++ b/configure
@@ -1,7 +1,7 @@
  #! /bin/sh
  # From configure.ac Revision: 1.2 .
  # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.59 for gc 7.0alpha4.
+# Generated by GNU Autoconf 2.59 for gc 7.0alpha5.
  #
  # Report bugs to <Hans.Boehm@hp.com>.
  #
@@ -429,8 +429,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
  # Identity of this package.
  PACKAGE_NAME='gc'
  PACKAGE_TARNAME='gc'
-PACKAGE_VERSION='7.0alpha4'
-PACKAGE_STRING='gc 7.0alpha4'
+PACKAGE_VERSION='7.0alpha5'
+PACKAGE_STRING='gc 7.0alpha5'
  PACKAGE_BUGREPORT='Hans.Boehm@hp.com'
  
  ac_unique_file="gcj_mlc.c"
@@ -957,7 +957,7 @@ if test "$ac_init_help" = "long"; then
    # Omit some internal or obsolete options to make the list less imposing.
    # This message is too long to be a string in the A/UX 3.1 sh.
    cat <<_ACEOF
-\`configure' configures gc 7.0alpha4 to adapt to many kinds of systems.
+\`configure' configures gc 7.0alpha5 to adapt to many kinds of systems.
  
  Usage: $0 [OPTION]... [VAR=VALUE]...
  
@@ -1024,7 +1024,7 @@ fi
  
  if test -n "$ac_init_help"; then
    case $ac_init_help in
-     short | recursive ) echo "Configuration of gc 7.0alpha4:";;
+     short | recursive ) echo "Configuration of gc 7.0alpha5:";;
     esac
    cat <<\_ACEOF
  
@@ -1174,7 +1174,7 @@ fi
  test -n "$ac_init_help" && exit 0
  if $ac_init_version; then
    cat <<\_ACEOF
-gc configure 7.0alpha4
+gc configure 7.0alpha5
  generated by GNU Autoconf 2.59
  
  Copyright (C) 2003 Free Software Foundation, Inc.
@@ -1188,7 +1188,7 @@ cat >&5 <<_ACEOF
  This file contains any messages produced by compilers while
  running configure, to aid debugging if configure makes a mistake.
  
-It was created by gc $as_me 7.0alpha4, which was
+It was created by gc $as_me 7.0alpha5, which was
  generated by GNU Autoconf 2.59.  Invocation command line was
  
    $ $0 $@
@@ -1960,7 +1960,7 @@ fi
  
  # Define the identity of the package.
   PACKAGE='gc'
- VERSION='7.0alpha4'
+ VERSION='7.0alpha5'
  
  
  cat >>confdefs.h <<_ACEOF
@@ -4595,14 +4595,14 @@ echo $ECHO_N "checking which machine-dependent code should be used... $ECHO_C" >
  machdep=
  case "$host" in
   alpha-*-openbsd*)
-    machdep="alpha_mach_dep.lo"
+    machdep="mach_dep.lo"
      if test x"${ac_cv_lib_dl_dlopen}" != xyes ; then
         { echo "$as_me:$LINENO: WARNING: OpenBSD/Alpha without dlopen(). Shared library support is disabled" >&5
  echo "$as_me: WARNING: OpenBSD/Alpha without dlopen(). Shared library support is disabled" >&2;}
      fi
      ;;
   alpha*-*-linux*)
-    machdep="alpha_mach_dep.lo"
+    machdep="mach_dep.lo"
      ;;
   i?86-*-solaris2.[89] | i?86-*-solaris2.1?)
      cat >>confdefs.h <<\_ACEOF
@@ -4611,7 +4611,7 @@ _ACEOF
  
      ;;
   mipstx39-*-elf*)
-    machdep="mips_ultrix_mach_dep.lo"
+    machdep="mach_dep.lo"
      cat >>confdefs.h <<\_ACEOF
  #define STACKBASE __stackbase
  _ACEOF
@@ -4622,31 +4622,31 @@ _ACEOF
  
      ;;
   mips-dec-ultrix*)
-    machdep="mips_ultrix_mach-dep.lo"
+    machdep="mach-dep.lo"
      ;;
   mips-nec-sysv*|mips-unknown-sysv*)
      ;;
   mips*-*-linux*)
      ;;
   mips-*-*)
-    machdep="mips_sgi_mach_dep.lo"
+    machdep="mach_dep.lo"
      cat >>confdefs.h <<\_ACEOF
  #define NO_EXECUTE_PERMISSION 1
  _ACEOF
  
      ;;
   sparc-*-netbsd*)
-    machdep="sparc_netbsd_mach_dep.lo"
+    machdep="mach_dep.lo sparc_netbsd_mach_dep.lo"
      ;;
   sparc-sun-solaris2.3)
-    machdep="sparc_mach_dep.lo"
+    machdep="mach_dep.lo sparc_mach_dep.lo"
      cat >>confdefs.h <<\_ACEOF
  #define SUNOS53_SHARED_LIB 1
  _ACEOF
  
      ;;
   sparc*-sun-solaris2.*)
-    machdep="sparc_mach_dep.lo"
+    machdep="mach_dep.lo sparc_mach_dep.lo"
      ;;
   ia64-*-*)
      machdep="mach_dep.lo ia64_save_regs_in_stack.lo"
@@ -10714,7 +10714,7 @@ _ASBOX
  } >&5
  cat >&5 <<_CSEOF
  
-This file was extended by gc $as_me 7.0alpha4, which was
+This file was extended by gc $as_me 7.0alpha5, which was
  generated by GNU Autoconf 2.59.  Invocation command line was
  
    CONFIG_FILES    = $CONFIG_FILES
@@ -10772,7 +10772,7 @@ _ACEOF
  
  cat >>$CONFIG_STATUS <<_ACEOF
  ac_cs_version="\\
-gc config.status 7.0alpha4
+gc config.status 7.0alpha5
  configured by $0, generated by GNU Autoconf 2.59,
    with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
  
diff --git a/configure.ac b/configure.ac

index 9becbad..3410804 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -17,7 +17,7 @@ dnl Process this file with autoconf to produce configure.
  # Initialization
  # ==============
  
-AC_INIT(gc,7.0alpha4,Hans.Boehm@hp.com) 
+AC_INIT(gc,7.0alpha5,Hans.Boehm@hp.com) 
      ## version must conform to [0-9]+[.][0-9]+(alpha[0-9]+)?
  AC_CONFIG_SRCDIR(gcj_mlc.c)
  AC_CANONICAL_TARGET 
@@ -276,42 +276,42 @@ AC_MSG_CHECKING(which machine-dependent code should be used)
  machdep=
  case "$host" in
   alpha-*-openbsd*)
-    machdep="alpha_mach_dep.lo"
+    machdep="mach_dep.lo"
      if test x"${ac_cv_lib_dl_dlopen}" != xyes ; then
         AC_MSG_WARN(OpenBSD/Alpha without dlopen(). Shared library support is disabled)
      fi
      ;;
   alpha*-*-linux*)
-    machdep="alpha_mach_dep.lo"
+    machdep="mach_dep.lo"
      ;;
   i?86-*-solaris2.[[89]] | i?86-*-solaris2.1?)
      AC_DEFINE(SOLARIS25_PROC_VDB_BUG_FIXED)
      ;;
   mipstx39-*-elf*)
-    machdep="mips_ultrix_mach_dep.lo"
+    machdep="mach_dep.lo"
      AC_DEFINE(STACKBASE, __stackbase)
      AC_DEFINE(DATASTART_IS_ETEXT)
      ;;
   mips-dec-ultrix*)
-    machdep="mips_ultrix_mach-dep.lo"
+    machdep="mach-dep.lo"
      ;;
   mips-nec-sysv*|mips-unknown-sysv*)
      ;;
   mips*-*-linux*) 
      ;; 
   mips-*-*)
-    machdep="mips_sgi_mach_dep.lo"
+    machdep="mach_dep.lo"
      AC_DEFINE(NO_EXECUTE_PERMISSION)
      ;;
   sparc-*-netbsd*)
-    machdep="sparc_netbsd_mach_dep.lo"
+    machdep="mach_dep.lo sparc_netbsd_mach_dep.lo"
      ;;
   sparc-sun-solaris2.3)
-    machdep="sparc_mach_dep.lo"
+    machdep="mach_dep.lo sparc_mach_dep.lo"
      AC_DEFINE(SUNOS53_SHARED_LIB)
      ;;
   sparc*-sun-solaris2.*)
-    machdep="sparc_mach_dep.lo"
+    machdep="mach_dep.lo sparc_mach_dep.lo"
      ;;
   ia64-*-*)
      machdep="mach_dep.lo ia64_save_regs_in_stack.lo"
diff --git a/doc/README b/doc/README

index 2230f41..159fa89 100644 (file)
--- a/doc/README
+++ b/doc/README
@@ -31,7 +31,7 @@ are GPL'ed, but with an exception that should cover all uses in the
  collector.  (If you are concerned about such things, I recommend you look
  at the notice in config.guess or ltmain.sh.)
  
-This is version 7.0alpha2 of a conservative garbage collector for C and C++.
+This is version 7.0alpha5 of a conservative garbage collector for C and C++.
  
  You might find a more recent version of this at
  
diff --git a/doc/README.changes b/doc/README.changes

index da79786..25b61c4 100644 (file)
--- a/doc/README.changes
+++ b/doc/README.changes
@@ -2236,8 +2236,21 @@ Since gc6.5:
     there.
   - More consistently define HBLKSIZE to 4096 on 64 bit architectures with
     4K pages.  (Thanks to Andrew Haley.)
-
-Since gc6.6:
+ - With win32 threads, GC_stop_world needs to acquire GC_write_cs.  (Thanks
+   to Ben Hutchings for the observation and patch.)
+ - Move up struct callinfo declaration to make gcc 4.0.2. happy.
+
+Since 6.6:
+ - Add "int" to Solaris "end" and "etext" declaration in gc.h.  Declared
+   the symbols with underscores and as arrays, since that's what's actually
+   used.  Perhaps this could all just be removed?  (Thanks to John Bowman.)
+ - Fixed ARM GC_test_and_set code.  (Thanks to Kazu Hirata and Paul Brook.)
+ - Added casts for assignments to hb_last_reclaimed, which truncate the
+   value.  Added a cast to GC_adj_words_allocd.  Use GetModuleHandleA
+   when retrieving a handle to kernel32.dll under win32.  (Thanks to the
+   Visual Prolog developers.)
+
+Since gc6.7:
   - Remove GC_PROTO, VOLATILE, GC_PTR, and GC_CONST.  Assume ANSI C compiler
     and use ANSI constructs unconditionally.
   - Introduce #elif and #error in some of the appropriate places.
@@ -2383,9 +2396,45 @@ Since gc7.0alpha3
   - Added GC_getattr_np-based GC_get_stack_base (untested).
   - Separated thread local allocation into a separate file and added the
     beginning of win32 support for that.
+
+Since gc7.0alpha4
+   (more 6.6, 6.7 changes)
+ - Some Solaris fixes, including some more general changes in how
+   the assembly pieces of mach_dep.c are handled.
+ - Removed a lot of SOLARIS_THREADS-specific code that was only
+   needed with the old implementation.  This included many (mostly no-op)
+   versions of GC_is_fresh.
+ - Don't use atomic_ops in gc_locks.h unless we need threads.
+ - Fixed USE_MARK_BITS, which is once againthe default without PARALLEL_MARK.
+ - Removed Solaris GC_INIT hack.  It's a workaround for a long dead bug,
+   and it seemed to be wrong anyway.
+ - Changed win32_threads.c to require preprocessor-based interception
+   of thread routines by default.  A client call to GC_use_DllMain is
+   now required to get the old behavior in which DllMain is used to implicitly
+   register threads.  This was doen for uniformity with other platforms, and
+   because the DllMain solution seemed to require very tricky code which,
+   at least in the past, imposed hard bounds onthe number of threads.
+ - Many small changes to make thread support work again on Cygwin.
+ - Moved definition of allocator lock etc. to pthread_support.c and
+   win32_threads.c for those two cases.
+ - Got rid of the FASTLOCK() machinery.  It doesn't seem useful on modern
+   platforms.
+ - Cleaned up the uncollectable allocation routines, speeding up the
+   slower paths.  The code did enough unnecessary work off the critical path
+   that the underlying logic was getting hard to extract.
+ - No longer turn off THREAD_LOCAL_ALLOC with DBG_HDRS_ALL.  Indications
+   are it just works, and I think the reasons for it not working disappeared
+   a while ago.
+ - Fixed bugs in hb_n_marks calculation and assertion.
+ - Don't use __builtin_expect for pre-3.0 gcc.
+ - Define GWW_VDB only for recent Microsoft tool chains.
+ - Add overview.html to doc directory.
+ - Fix NT_STATIC_THREADS_MAKEFILE, various compiler warnings.
+ - Made thread local allocation sort of work with Cygwin.  The code should
+   be there to deal with other Windows variants, But non-Cygwin Windows
+   threads need more bug fixes.
    
  To do:
- - Fix USE_MARK_BITS.
   - REDIRECT_MALLOC and threads combination is getting closer, but currently
     usually fails because the DTV (dynamic thread vector) used to access
     thread-local storage is referenced only from the base of a thread stack,
@@ -2394,7 +2443,6 @@ To do:
     Typically large heap sections end up cleared.
   - Clone marker inner loop to support arch-dependent prefetching,
     and counting of objects marked for finalization.
- - function wrapping??
   - The USE_MUNMAP code should really use a separate data structure
     indexed by physical page to keep track of time since last use of
     a page.  Using hblk headers means we lose track of ages when
diff --git a/doc/README.linux b/doc/README.linux

index 1d0fd4c..99f4bbc 100644 (file)
--- a/doc/README.linux
+++ b/doc/README.linux
@@ -19,15 +19,15 @@ Linux threads.  These should not be touched by the client program.
  
  To use threads, you need to abide by the following requirements:
  
-1) You need to use LinuxThreads (which are included in libc6).
+1) You need to use LinuxThreads or NPTL (which are included in libc6).
  
     The collector relies on some implementation details of the LinuxThreads
-   package.  It is unlikely that this code will work on other
+   package.  This code may not work on other
     pthread implementations (in particular it will *not* work with
     MIT pthreads).
  
-2) You must compile the collector with -DGC_LINUX_THREADS and -D_REENTRANT
-   specified in the Makefile.
+2) You must compile the collector with -DGC_LINUX_THREADS (or
+   just -DGC_THREADS) and -D_REENTRANT specified in the Makefile.
  
  3a) Every file that makes thread calls should define GC_LINUX_THREADS and 
     _REENTRANT and then include gc.h.  Gc.h redefines some of the
diff --git a/doc/doc.am b/doc/doc.am

index a90e05d..d95fa17 100644 (file)
--- a/doc/doc.am
+++ b/doc/doc.am
@@ -43,6 +43,7 @@ dist_pkgdata_DATA = \
         doc/README.solaris2 \
         doc/README.uts \
         doc/README.win32 \
+       doc/overview.html \
         doc/tree.html \
         doc/leak.html \
         doc/gcinterface.html \
diff --git a/doc/gcdescr.html b/doc/gcdescr.html

index cab6bde..dc08470 100644 (file)
--- a/doc/gcdescr.html
+++ b/doc/gcdescr.html
@@ -1,7 +1,7 @@
  <HTML>
  <HEAD>
      <TITLE> Conservative GC Algorithmic Overview </TITLE>
-    <AUTHOR> Hans-J. Boehm, HP Labs (Much of this was written at SGI)</author>
+    <AUTHOR> Hans-J. Boehm, HP Labs (Some of this was written at SGI)</author>
  </HEAD>
  <BODY>
  <H1> <I>This is under construction, and may always be.</i> </h1>
@@ -549,6 +549,67 @@ by using ld's function call wrapping mechanism under Linux.
  Recent versions of the collector support several facilites to enhance
  the processor-scalability and thread performance of the collector.
  These are discussed in more detail <A HREF="scale.html">here</a>.
+We briefly outline the data approach to thread-local allocation in the
+next section.
+<H2>Thread-local allocation</h2>
+If thread-local allocation is enabled, the collector keeps separate
+arrays of free lists for each thread.  Thread-local allocation
+is currently only supported on a few platforms.
+<P>
+The free list arrays associated
+with each thread are only used to satisfy requests for objects that
+are  both very small, and belong to one of a small number of well-known
+kinds.  These currently include "normal" and pointer-free objects.
+Depending onthe configuration, "gcj" objects may also be included.
+<P>
+Thread-local free list entries contain either a pointer to the first
+element of a free list, or they contain a counter of the number of
+allocation "granules" allocated so far.  Initially they contain the
+value one, i.e. a small counter value.
+<P>
+Thread-local allocation allocates directly through the global
+allocator, if the object is of a size or kind not covered by the
+local free lists.
+<P>
+If there is an appropriate local free list, the allocator checks whether it
+contains a sufficiently small counter value.  If so, the counter is simply
+incremented by the counter value, and the global allocator is used.
+In this way, the initial few allocations of a given size bypass the local
+allocator.  A thread that only allocates a handful of objects of a given
+size will not build up its own free list for that size.  This avoids
+wasting space for unpopular objects sizes or kinds.
+<P>
+Once the counter passes a threshold, <TT>GC_malloc_many</tt> is called
+to allocate roughly <TT>HBLKSIZE</tt> space and put it on the corresponding
+local free list.  Further allocations of that size and kind then use
+this free list, and no longer need to acquire the allocation lock.
+The allocation procedure is otherwise similar to the global free lists.
+The local free lists are also linked using the first word in the object.
+In most cases this means they require considerably less time.
+<P>
+Local free lists are treated buy most of the rest of the collector
+as though they were in-use reachable data.  This requires some care,
+since pointer-free objects are not normally traced, and hence a special
+tracing procedure is required to mark all objects on pointer-free and
+gcj local free lists.
+<P>
+On thread exit, any remaining thread-local free list entries are
+transferred back to the global free list.
+<P>
+Note that if the collector is configured for thread-local allocation,
+GC versions before 7 do not invoke the thread-local allocator by default.
+<TT>GC_malloc</tt> only uses thread-local allocation in version 7 and later.
+In earlier versions, <TT>GC_MALLOC</tt> (all caps) may be directed
+to use thread-local allocation by defining <TT>GC_REDIRECT_TO_LOCAL</tt>
+and then include <TT>gc_local_alloc.h</tt>.
+<P>
+For some more details see <A HREF="scale.html">here</a>, and the
+technical report entitled
+<A HREF="http://www.hpl.hp.com/techreports/2000/HPL-2000-165.html">
+``Fast Multiprocessor Memory Allocation and Garbage Collection''
+</a>
+<P>
+<HR>
  <P>
  Comments are appreciated.  Please send mail to
  <A HREF="mailto:boehm@acm.org"><TT>boehm@acm.org</tt></a> or
diff --git a/doc/overview.html b/doc/overview.html

new file mode 100644 (file)

index 0000000..d31f937
--- /dev/null
+++ b/doc/overview.html
@@ -0,0 +1,446 @@
+<!DOCTYPE HTML>
+<html><head><title>A garbage collector for C and C++</title></head>
+<body>
+<table bgcolor="#f0f0ff" cellpadding="10%">
+  <tbody><tr>
+  <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcinterface.html">Interface Overview</a></td>
+  <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/04tutorial.pdf">Tutorial Slides</a></td>
+  <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">FAQ</a></td>
+  <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/simple_example.html">Example</a></td>
+  <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source">Download</a></td>
+  <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/license.txt">License</a></td>
+  </tr>
+</tbody></table>
+<h1>A garbage collector for C and C++</h1>
+<ul>
+<li><a href="#platforms">Platforms</a>
+</li><li><a href="#multiprocessors">Scalable multiprocessor versions</a>
+</li><li><a href="#details">Some collector details</a>
+</li><li><a href="#further">Further reading</a>
+</li><li><a href="#users">Current users</a>
+</li><li><a href="#collector">Local Links for this collector</a>
+</li><li><a href="#background">Local Background Links</a>
+</li><li><a href="#contacts">Contacts and Mailing List</a>
+</li></ul>
+[ This is an updated version of the page formerly at
+<tt>http://reality.sgi.com/boehm/gc.html</tt>
+and before that at
+<a href="ftp://parcftp.xerox.com/pub/gc/gc.html">
+<tt>ftp://parcftp.xerox.com/pub/gc/gc.html</tt></a>.]
+<p>
+The <a href="http://www.hpl.hp.com/personal/Hans_Boehm">Boehm</a>-<a href="http://www.cs.cornell.edu/annual_report/00-01/bios.htm#demers">Demers</a>-<a href="http://www-sul.stanford.edu/weiser/">Weiser</a>
+conservative garbage collector can
+be used as a garbage collecting
+replacement for C <tt>malloc</tt> or C++ <tt>new</tt>.
+It allows you to allocate memory basically as you normally would,
+without explicitly deallocating memory that is no longer useful.
+The collector automatically recycles memory when it determines
+that it can no longer be otherwise accessed.
+A simple example of such a use is given
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/simple_example.html">here</a>.
+</p><p>
+The collector is also used by a number of programming language
+implementations that either use C as intermediate code, want
+to facilitate easier interoperation with C libraries, or
+just prefer the simple collector interface.
+For a more detailed description of the interface, see
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcinterface.html">here</a>.
+</p><p>
+Alternatively, the garbage collector  may be used as
+a <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/leak.html">leak detector</a>
+for C or C++ programs, though that is not its primary goal.
+</p><p>
+Typically several versions will be available.
+Usually you should first try to use
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gc.tar.gz"><tt>gc_source/gc.tar.gz</tt></a>,
+which is normally an older, more stable version.
+</p><p>
+If that fails, try the latest explicitly numbered version
+in <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/">
+<tt>gc_source/</tt></a>.
+Later versions may contain additional features, platform support,
+or bug fixes, but are likely to be less well tested.
+Note that versions containing the letters <tt>alpha</tt> are even less
+well tested than others, especially on non-HP platforms.
+</p><p>
+A slightly older version of the garbage collector is now also
+included as part of the
+<a href="http://gcc.gnu.org/">GNU compiler</a>
+distribution.  The source
+code for that version is available for browsing
+<a href="http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/boehm-gc/">here</a>.
+</p><p>
+The arguments for and against conservative garbage collection
+in C and C++ are briefly
+discussed in
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/issues.html">issues.html</a>.  The beginnings of
+a frequently-asked-questions list are <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">here</a>.
+</p><p>
+The garbage collector code is copyrighted by
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm">Hans-J. Boehm</a>,
+Alan J. Demers,
+<a href="http://www.xerox.com/">Xerox Corporation</a>,
+<a href="http://www.sgi.com/">Silicon Graphics</a>,
+and
+<a href="http://www.hp.com/">Hewlett-Packard Company</a>.
+It may be used and copied without payment of a fee under minimal restrictions.
+See the README file in the distribution  or the
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/license.txt">license</a> for more details.
+<b>IT IS PROVIDED AS IS,
+WITH ABSOLUTELY NO WARRANTY EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK</b>.
+</p><p>
+Empirically, this collector works with most unmodified C programs,
+simply by replacing
+<tt>malloc</tt> with <tt>GC_malloc</tt> calls,
+replacing <tt>realloc</tt> with <tt>GC_realloc</tt> calls, and removing
+free calls.  Exceptions are discussed
+in <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/issues.html">issues.html</a>. 
+</p><h2><a name="platforms">Platforms</a></h2>
+The collector is not completely portable, but the distribution
+includes ports to most standard PC and UNIX/Linux platforms.
+The collector should work on Linux, *BSD, recent Windows versions,
+MacOS X, HP/UX, Solaris,
+Tru64, Irix and a few other operating systems.
+Some ports are more polished than others.
+<p>
+Irix pthreads, Linux threads, Win32 threads, Solaris threads
+(old style and pthreads),
+HP/UX 11 pthreads, Tru64 pthreads, and MacOS X threads are supported
+in recent versions.
+</p><h3>Separately distributed ports</h3>
+For MacOS 9/Classic use, Patrick Beard's latest port is available from
+<a href="http://homepage.mac.com/pcbeard/gc/">
+<tt>http://homepage.mac.com/pcbeard/gc/</tt></a>.
+(Unfortunately, that's now quite dated.
+I'm not in a position to test under MacOS.  Although I try to
+incorporate changes, it is impossible for
+me to update the project file.)
+<p>
+Precompiled versions of the collector for NetBSD are available
+<a href="ftp://ftp.netbsd.org/pub/NetBSD/packages/pkgsrc/devel/boehm-gc/README.html">here</a>
+or
+<a href="http://www.netbsd.org/packages/devel/boehm-gc/README.html">here</a>.
+</p><p>
+<a href="http://www.debian.org/">Debian Linux</a> includes prepackaged
+versions of the collector.
+</p><h2><a name="multiprocessors">Scalable multiprocessor versions</a></h2>
+Kenjiro Taura, Toshio Endo, and Akinori Yonezawa have made available
+a <a href="http://www.yl.is.s.u-tokyo.ac.jp/gc/">parallel collector</a>
+based on this one.  Their collector takes advantage of multiple processors
+during a collection.  Starting with collector version 6.0alpha1
+we also do this, though with more modest processor scalability goals.
+Our approach is discussed briefly in
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/scale.html"><tt>scale.html</tt></a>.
+<h2><a name="details">Some Collector Details</a></h2>
+The collector uses a <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/complexity.html">mark-sweep</a> algorithm.
+It provides incremental and generational
+collection under operating systems which provide the right kind of
+virtual memory support.  (Currently this includes SunOS[45], IRIX,
+OSF/1, Linux, and Windows, with varying restrictions.)
+It allows <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/finalization.html"><i>finalization</i></a> code
+to be invoked when an object is collected.
+It can take advantage of type information to locate pointers if such
+information is provided, but it is usually used without such information.
+ee the README and
+<tt>gc.h</tt> files in the distribution for more details.
+<p>
+For an overview of the implementation, see <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcdescr.html">here</a>.
+</p><p>
+The garbage collector distribution includes a C string
+(<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/cordh.txt"><i>cord</i></a>) package that provides
+for fast concatenation and substring operations on long strings.
+A simple curses- and win32-based editor that represents the entire file
+as a cord is included as a
+sample application.
+</p><p>
+Performance of the nonincremental collector is typically competitive
+with malloc/free implementations.  Both space and time overhead are
+likely to be only slightly higher
+for programs written for malloc/free
+(see Detlefs, Dosser and Zorn's
+<a href="ftp://ftp.cs.colorado.edu/pub/techreports/zorn/CU-CS-665-93.ps.Z">Memory Allocation Costs in Large C and C++ Programs</a>.)
+For programs allocating primarily very small objects, the collector
+may be faster; for programs allocating primarily large objects it will
+be slower.  If the collector is used in a multithreaded environment
+and configured for thread-local allocation, it may in some cases
+significantly outperform malloc/free allocation in time.
+</p><p>
+We also expect that in many cases any additional overhead
+will be more than compensated for by decreased copying etc.
+if programs are written
+and tuned for garbage collection.
+</p><h1><a name="further">Further Reading:</a></h1>
+<b>The beginnings of a frequently asked questions list for this
+collector are <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">here</a></b>.
+<p>
+<b>The following provide information on garbage collection in general</b>:
+</p><p>
+Paul Wilson's <a href="ftp://ftp.cs.utexas.edu/pub/garbage">garbage collection ftp archive</a> and <a href="ftp://ftp.cs.utexas.edu/pub/garbage/gcsurvey.ps">GC survey</a>.
+</p><p>
+The Ravenbrook <a href="http://www.memorymanagement.org/">
+Memory Management Reference</a>.
+</p><p>
+David Chase's
+<a href="http://www.iecc.com/gclist/GC-faq.html">GC FAQ</a>.
+</p><p>
+Richard Jones'
+<a href="http://www.ukc.ac.uk/computer_science/Html/Jones/gc.html">
+GC page</a> and
+<a href="http://www.cs.kent.ac.uk/people/staff/rej/gcbook/gcbook.html">
+his book</a>.
+</p><p>
+<b>The following papers describe the collector algorithms we use
+and the underlying design decisions at
+a higher level.</b>
+</p><p>
+(Some of the lower level details can be found
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcdescr.html">here</a>.)
+</p><p>
+The first one is not available
+electronically due to copyright considerations.  Most of the others are
+subject to ACM copyright.
+</p><p>
+Boehm, H., "Dynamic Memory Allocation and Garbage Collection", <i>Computers in Physics
+9</i>, 3, May/June 1995, pp. 297-303.  This is directed at an otherwise sophisticated
+audience unfamiliar with memory allocation issues.  The algorithmic details differ
+from those in the implementation.  There is a related letter to the editor and a minor
+correction in the next issue.
+</p><p>
+Boehm, H., and <a href="http://www.ubiq.com/hypertext/weiser/weiser.html">M. Weiser</a>,
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/spe_gc_paper">"Garbage Collection in an Uncooperative Environment"</a>,
+<i>Software Practice &amp; Experience</i>, September 1988, pp. 807-820.
+</p><p>
+Boehm, H., A. Demers, and S. Shenker, <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi91.ps.Z">"Mostly Parallel Garbage Collection"</a>, Proceedings
+of the ACM SIGPLAN '91 Conference on Programming Language Design and Implementation,
+<i>SIGPLAN Notices 26</i>, 6 (June 1991), pp. 157-164.
+</p><p>
+Boehm, H., <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi93.ps.Z">"Space Efficient Conservative Garbage Collection"</a>, Proceedings of the ACM
+SIGPLAN '93 Conference on Programming Language Design and Implementation, <i>SIGPLAN
+Notices 28</i>, 6 (June 1993), pp. 197-206.
+</p><p>
+Boehm, H., "Reducing Garbage Collector Cache Misses",
+<i> Proceedings of the 2000 International Symposium on Memory Management </i>.
+<a href="http://portal.acm.org/citation.cfm?doid=362422.362438">
+Official version.</a>
+<a href="http://www.hpl.hp.com/techreports/2000/HPL-2000-99.html">
+Technical report version.</a>  Describes the prefetch strategy
+incorporated into the collector for some platforms.  Explains why
+the sweep phase of a "mark-sweep" collector should not really be
+a distinct phase.
+</p><p>
+M. Serrano, H. Boehm,
+"Understanding Memory Allocation of Scheme Programs",
+<i>Proceedings of the Fifth ACM SIGPLAN International Conference on
+Functional Programming</i>, 2000, Montreal, Canada, pp. 245-256.
+<a href="http://www.acm.org/pubs/citations/proceedings/fp/351240/p245-serrano/">
+Official version.</a>
+<a href="http://www.hpl.hp.com/techreports/2000/HPL-2000-62.html">
+Earlier Technical Report version.</a>  Includes some discussion of the
+collector debugging facilities for identifying causes of memory retention.
+</p><p>
+Boehm, H.,
+"Fast Multiprocessor Memory Allocation and Garbage Collection",
+<a href="http://www.hpl.hp.com/techreports/2000/HPL-2000-165.html">
+HP Labs Technical Report HPL 2000-165</a>.  Discusses the parallel
+collection algorithms, and presents some performance results.
+</p><p>
+Boehm, H., "Bounding Space Usage of Conservative Garbage Collectors",
+<i>Proceeedings of the 2002 ACM SIGPLAN-SIGACT Symposium on Principles of
+Programming Languages</i>, Jan. 2002, pp. 93-100.
+<a href="http://portal.acm.org/citation.cfm?doid=503272.503282">
+Official version.</a>
+<a href="http://www.hpl.hp.com/techreports/2001/HPL-2001-251.html">
+Technical report version.</a>
+Includes a discussion of a collector facility to much more reliably test for
+the potential of unbounded heap growth.
+</p><p>
+<b>The following papers discuss language and compiler restrictions necessary to guaranteed
+safety of conservative garbage collection.</b>
+</p><p>
+We thank John Levine and JCLT for allowing
+us to make the second paper available electronically, and providing PostScript for the final
+version.
+</p><p>
+Boehm, H., <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi96.ps.gz">``Simple
+Garbage-Collector-Safety''</a>, Proceedings
+of the ACM SIGPLAN '96 Conference on Programming Language Design
+and Implementation.
+</p><p>
+Boehm, H., and D. Chase,  <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/boecha.ps.gz">
+``A Proposal for Garbage-Collector-Safe C Compilation''</a>,
+<i>Journal of C  Language Translation 4</i>, 2 (Decemeber 1992), pp. 126-141.
+</p><p>
+<b>Other related information: </b>
+</p><p>
+The Detlefs, Dosser and Zorn's <a href="ftp://ftp.cs.colorado.edu/pub/techreports/zorn/CU-CS-665-93.ps.Z">Memory Allocation Costs in Large C and C++ Programs</a>.
+ This is a performance comparison of the Boehm-Demers-Weiser collector to malloc/free,
+using programs written for malloc/free.
+</p><p>
+Joel Bartlett's <a href="ftp://ftp.digital.com/pub/DEC/CCgc">mostly copying conservative garbage collector for C++</a>.
+</p><p>
+John Ellis and David Detlef's <a href="ftp://parcftp.xerox.com/pub/ellis/gc/gc.ps">Safe Efficient Garbage Collection for C++</a> proposal.
+</p><p>
+Henry Baker's <a href="http://home.pipeline.com/%7Ehbaker1/">paper collection</a>.
+</p><p>
+Slides for Hans Boehm's <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/myths.ps">Allocation and GC Myths</a> talk.
+</p><h1><a name="users">Current users:</a></h1>
+Known current users of some variant of this collector include:
+<p>
+The runtime system for <a href="http://gcc.gnu.org/java">GCJ</a>,
+the static GNU java compiler.
+</p><p>
+<a href="http://w3m.sourceforge.net/">W3m</a>, a text-based web browser.
+</p><p>
+Some versions of the Xerox DocuPrint printer software.
+</p><p>
+The <a href="http://www.mozilla.org/">Mozilla</a> project, as leak
+detector.
+</p><p>
+The <a href="http://www.go-mono.com/">Mono</a> project,
+an open source implementation of the .NET development framework.
+</p><p>
+The <a href="http://www.gnu.org/projects/dotgnu/">DotGNU Portable.NET
+project</a>, another open source .NET implementation.
+</p><p>
+The <a href="http://irssi.org/">Irssi IRC client</a>.
+</p><p>
+<a href="http://titanium.cs.berkeley.edu/">The Berkeley Titanium project</a>.
+</p><p>
+<a href="http://www.nag.co.uk/nagware_fortran_compilers.asp">The NAGWare f90 Fortran 90 compiler</a>.
+</p><p>
+Elwood Corporation's <a href="http://www.elwood.com/eclipse-info/index.htm">
+Eclipse</a> Common Lisp system, C library, and translator.
+</p><p>
+The <a href="http://www-sop.inria.fr/mimosa/fp/Bigloo/">Bigloo
+Scheme</a>
+and <a href="http://kaolin.unice.fr/%7Eserrano/camloo.html">Camloo ML
+compilers</a>
+written by Manuel Serrano and others.
+</p><p>
+Brent Benson's <a href="http://ftp.cs.indiana.edu/pub/scheme-repository/imp/">libscheme</a>.
+</p><p>
+The <a href="http://www.cs.rice.edu/CS/PLT/packages/mzscheme/index.html">MzScheme</a> scheme implementation.
+</p><p>
+The <a href="http://www.cs.washington.edu/research/projects/cecil/www/cecil-home.html">University of Washington Cecil Implementation</a>.
+</p><p>
+<a href="http://www.icsi.berkeley.edu/Sather/">The Berkeley Sather implementation</a>.
+</p><p>
+<a href="http://www.cs.berkeley.edu/%7Eharmonia/">The Berkeley Harmonia Project</a>.
+</p><p>
+The <a href="http://www.cs.arizona.edu/sumatra/toba/">Toba</a> Java Virtual
+Machine to C translator.
+</p><p>
+The <a href="http://www.gwydiondylan.org/">Gwydion Dylan compiler</a>.
+</p><p>
+The <a href="http://gcc.gnu.org/onlinedocs/gcc/Objective-C.html">
+GNU Objective C runtime</a>.
+</p><p>
+<a href="http://www.math.uiuc.edu/Macaulay2">Macaulay 2</a>, a system to support
+research in algebraic geometry and commutative algebra.
+</p><p>
+The <a href="http://www.vestasys.org/">Vesta</a> configuration management
+system.
+</p><p>
+<a href="http://www.visual-prolog.com/vip6">Visual Prolog 6</a>.
+</p><p>
+<a href="http://asymptote.sf.net/">Asymptote LaTeX-compatible
+vector graphics language.</a>
+
+</p><h1><a name="collector">More collector information at this site</a></h1>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/simple_example.html">A simple illustration of how to build and
+use the collector.</a>.
+<p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcinterface.html">Description of alternate interfaces to the
+garbage collector.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/04tutorial.pdf">Slides from an ISMM 2004  tutorial about the GC.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">A FAQ (frequently asked questions) list.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/leak.html">How to use the garbage collector as a leak detector.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/debugging.html">Some hints on debugging garbage collected
+applications.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcdescr.html">An overview of the implementation of the
+garbage collector.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/tree.html">The data structure used for fast pointer lookups.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/scale.html">Scalability of the collector to multiprocessors.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source">Directory containing garbage collector source.</a>
+
+</p><h1><a name="background">More background information at this site</a></h1>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/bounds.html">An attempt to establish a bound on space usage of
+conservative garbage collectors.</a>
+<p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/complexity.html">Mark-sweep versus copying garbage collectors
+and their complexity.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/conservative.html">Pros and cons of conservative garbage collectors,
+in comparison to other collectors.
+</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/issues.html">Issues related to garbage collection vs.
+manual memory management in C/C++.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/example.html">An example of a case in which garbage collection
+results in a much faster implementation as a result of reduced
+synchronization.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/nonmoving">Slide set discussing performance of nonmoving
+garbage collectors.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/popl03/web">
+Slide set discussing <i>Destructors, Finalizers, and Synchronization</i>
+(POPL 2003).</a>
+</p><p>
+<a href="http://portal.acm.org/citation.cfm?doid=604131.604153">
+Paper corresponding to above slide set.</a>
+(<a href="http://www.hpl.hp.com/techreports/2002/HPL-2002-335.html">
+Technical Report version</a>.)
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_bench.html">A Java/Scheme/C/C++ garbage collection benchmark.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/myths.ps">Slides for talk on memory allocation myths.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gctalk.ps">Slides for OOPSLA 98 garbage collection talk.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers">Related papers.</a>
+</p><h1><a name="contacts">Contacts and Mailing List</a><a></a></h1>
+<a>We have recently set up two mailing list for collector announcements
+and discussions:
+</a><ul>
+<li><a href="mailto:gc-announce@linux.hpl.hp.com">gc-announce@linux.hpl.hp.com</a>
+is used for announcements of new versions.  Postings are restricted.
+We expect this to always remain a very low volume list.
+</li><li><a href="mailto:gc@linux.hpl.hp.com">gc@linux.hpl.hp.com</a> is used for
+discussions, bug reports, and the like.  Subscribers may post.
+On-topic posts by nonsubscribers will usually also be accepted, but
+it may take some time to review them.
+</li></ul>
+To subscribe to these lists, send a mail message containing the
+word "subscribe" to
+<a href="mailto:gc-announce-request@linux.hpl.hp.com?subject=subscribe">gc-announce-request@linux.hpl.hp.com</a>
+or to
+<a href="mailto:gc-request@linux.hpl.hp.com?subject=subscribe">gc-request@linux.hpl.hp.com</a>.
+(Please ignore the instructions about web-based subscription.
+The listed web site is behind the HP firewall.)
+<p>
+The archives for these lists appear
+<a href="http://www.hpl.hp.com/hosted/linux/mail-archives">here</a>.
+The gc list archive may also be read at
+<a href="http://dir.gmane.org/gmane.comp.programming.garbage-collection.boehmgc">gmane.org</a>.
+</p><p>
+Some prior discussion of the collector has taken place on the gcc
+java mailing list, whose archives appear
+<a href="http://gcc.gnu.org/ml/java/">here</a>, and also on
+<a href="http://lists.tunes.org/mailman/listinfo/gclist">gclist@iecc.com</a>.
+</p><p>
+Comments and bug reports may also be sent to
+(<a href="mailto:Hans_Boehm@hp.com">Hans.Boehm@hp.com</a>) or
+(<a href="mailto:boehm@acm.org">boehm@acm.org</a>), but the gc
+mailing list is usually preferred.
+ 
+</p></body></html>
diff --git a/headers.c b/headers.c

index 8b14b4b..1a0ce88 100644 (file)
--- a/headers.c
+++ b/headers.c
@@ -254,7 +254,7 @@ struct hblkhdr * GC_install_header(struct hblk *h)
      result = alloc_hdr();
      SET_HDR(h, result);
  #   ifdef USE_MUNMAP
-       result -> hb_last_reclaimed = GC_gc_no;
+       result -> hb_last_reclaimed = (unsigned short)GC_gc_no;
  #   endif
      return(result);
  }
diff --git a/include/gc.h b/include/gc.h

index a47dc4a..5f049c5 100644 (file)
--- a/include/gc.h
+++ b/include/gc.h
@@ -962,17 +962,7 @@ extern void GC_thr_init(void);     /* Needed for Solaris/X86       */
    * A GC_INIT call is required if the collector is built with THREAD_LOCAL_ALLOC
    * defined and the initial allocation call is not to GC_malloc().
    */
-#if (defined(sparc) || defined(__sparc)) && defined(sun)
-    /*
-     * If you are planning on putting
-     * the collector in a SunOS 5 dynamic library, you need to call GC_INIT()
-     * from the statically loaded program section.
-     * This circumvents a Solaris 2.X (X<=4) linker bug.
-     */
-#   define GC_INIT() { extern end, etext; \
-                      GC_noop(&end, &etext); \
-                      GC_init();}
-#elif defined(__CYGWIN32__) && defined(GC_DLL) || defined (_AIX)
+#if defined(__CYGWIN32__) || defined (_AIX)
      /*
       * Similarly gnu-win32 DLLs need explicit initialization from
       * the main program, as does AIX.
@@ -984,15 +974,22 @@ extern void GC_thr_init(void);    /* Needed for Solaris/X86       */
        extern int _bss_end__[];
  #     define GC_MAX(x,y) ((x) > (y) ? (x) : (y))
  #     define GC_MIN(x,y) ((x) < (y) ? (x) : (y))
-#     define GC_DATASTART ((GC_PTR) GC_MIN(_data_start__, _bss_start__))
-#     define GC_DATAEND         ((GC_PTR) GC_MAX(_data_end__, _bss_end__))
+#     define GC_DATASTART ((void *) GC_MIN(_data_start__, _bss_start__))
+#     define GC_DATAEND         ((void *) GC_MAX(_data_end__, _bss_end__))
+#     if defined(GC_DLL)
+#       define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); }
+#     else
+       /* Main program init not required, but other defined needed for */
+       /* uniformity.                                                  */
+#       define GC_INIT() { GC_init(); }
+#     endif
  #   endif
  #   if defined(_AIX)
        extern int _data[], _end[];
-#     define GC_DATASTART ((GC_PTR)((ulong)_data))
-#     define GC_DATAEND ((GC_PTR)((ulong)_end))
+#     define GC_DATASTART ((void *)((ulong)_data))
+#     define GC_DATAEND ((void *)((ulong)_end))
+#     define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); }
  #   endif
-#   define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); }
  #else
  #   define GC_INIT() { GC_init(); }
  #endif
diff --git a/include/gc_config_macros.h b/include/gc_config_macros.h

index 2cfa6c2..f3b5ef4 100644 (file)
--- a/include/gc_config_macros.h
+++ b/include/gc_config_macros.h
@@ -5,12 +5,12 @@
   * Some tests for old macros.  These violate our namespace rules and will
   * disappear shortly.  Use the GC_ names.
   */
-#if defined(SOLARIS_THREADS) || defined(_SOLARIS_THREADS)
+#if defined(SOLARIS_THREADS) || defined(_SOLARIS_THREADS) \
+    || defined(_SOLARIS_PTHREADS) || defined(GC_SOLARIS_PTHREADS)
+  /* We no longer support old style Solaris threads.           */
+  /* GC_SOLARIS_THREADS now means pthreads.                    */
  # define GC_SOLARIS_THREADS
  #endif
-#if defined(_SOLARIS_PTHREADS)
-# define GC_SOLARIS_PTHREADS
-#endif
  #if defined(IRIX_THREADS)
  # define GC_IRIX_THREADS
  #endif
@@ -39,7 +39,6 @@
  #endif
  
  #if !defined(_REENTRANT) && (defined(GC_SOLARIS_THREADS) \
-                            || defined(GC_SOLARIS_PTHREADS) \
                              || defined(GC_HPUX_THREADS) \
                              || defined(GC_AIX_THREADS) \
                              || defined(GC_LINUX_THREADS))
@@ -52,7 +51,7 @@
  # define _POSIX4A_DRAFT10_SOURCE 1
  #endif
  
-# if defined(GC_SOLARIS_PTHREADS) || defined(GC_FREEBSD_THREADS) || \
+# if defined(GC_SOLARIS_THREADS) || defined(GC_FREEBSD_THREADS) || \
         defined(GC_IRIX_THREADS) || defined(GC_LINUX_THREADS) || \
         defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS) || \
         defined(GC_DGUX386_THREADS) || defined(GC_DARWIN_THREADS) || \
@@ -79,10 +78,12 @@
  #   define GC_IRIX_THREADS
  #   define GC_PTHREADS
  # endif
-# if defined(__sparc) && !defined(__linux__)
-#   define GC_SOLARIS_PTHREADS
+# if defined(__sparc) && !defined(__linux__) \
+     || defined(sun) && (defined(i386) || defined(__i386__))
+#   define GC_SOLARIS_THREADS
  #   define GC_PTHREADS
  # endif
+
  # if defined(__APPLE__) && defined(__MACH__) && defined(__ppc__)
  #   define GC_DARWIN_THREADS
  #   define GC_PTHREADS
@@ -111,10 +112,6 @@
  # endif
  #endif
  
-#if defined(GC_SOLARIS_PTHREADS) && !defined(GC_SOLARIS_THREADS)
-#   define GC_SOLARIS_THREADS
-#endif
-
  # define __GC
  # ifndef _WIN32_WCE
  #   include <stddef.h>
diff --git a/include/gc_inline.h b/include/gc_inline.h

index d2008cf..5f6b6bb 100644 (file)
--- a/include/gc_inline.h
+++ b/include/gc_inline.h
@@ -26,9 +26,12 @@
  #include "gc.h"
  #include "gc_tiny_fl.h"
  
-#ifndef __GNUC__
-#  define __builtin_expect(x, y) (x)
-#endif
+#if __GNUC__ >= 3
+# define GC_EXPECT(expr, outcome) __builtin_expect(expr,outcome)
+  /* Equivalent to (expr), but predict that usually (expr)==outcome. */
+#else
+# define GC_EXPECT(expr, outcome) (expr)
+#endif /* __GNUC__ */
  
  /* The ultimately general inline allocation macro.  Allocate an object */
  /* of size bytes, putting the resulting pointer in result.  Tiny_fl is */
@@ -49,14 +52,14 @@
  # define GC_FAST_MALLOC_GRANS(result,granules,tiny_fl,num_direct,\
                               kind,default_expr,init) \
  { \
-    if (__builtin_expect(granules >= GC_TINY_FREELISTS,0)) { \
+    if (GC_EXPECT(granules >= GC_TINY_FREELISTS,0)) { \
          result = default_expr; \
      } else { \
         void **my_fl = tiny_fl + granules; \
          void *my_entry=*my_fl; \
         void *next; \
   \
-       while (__builtin_expect((word)my_entry \
+       while (GC_EXPECT((word)my_entry \
                                 <= num_direct + GC_TINY_FREELISTS + 1, 0)) { \
             /* Entry contains counter or NULL */ \
             if ((word)my_entry - 1 < num_direct) { \
@@ -81,7 +84,7 @@
         init; \
          PREFETCH_FOR_WRITE(next); \
          GC_ASSERT(GC_size(result) >= bytes + EXTRA_BYTES); \
-        GC_ASSERT(((word *)result)[1] == 0); \
+        GC_ASSERT((kind) == PTRFREE || ((word *)result)[1] == 0); \
        out: ; \
     } \
  }
diff --git a/include/private/gc_locks.h b/include/private/gc_locks.h

index 4dcba2b..5eecc50 100644 (file)
--- a/include/private/gc_locks.h
+++ b/include/private/gc_locks.h
@@ -18,22 +18,10 @@
  #ifndef GC_LOCKS_H
  #define GC_LOCKS_H
  
-#include <atomic_ops.h>
-
  /*
   * Mutual exclusion between allocator/collector routines.
   * Needed if there is more than one allocator thread.
- * FASTLOCK() is assumed to try to acquire the lock in a cheap and
- * dirty way that is acceptable for a few instructions, e.g. by
- * inhibiting preemption.  This is assumed to have succeeded only
- * if a subsequent call to FASTLOCK_SUCCEEDED() returns TRUE.
- * FASTUNLOCK() is called whether or not FASTLOCK_SUCCEEDED().
- * If signals cannot be tolerated with the FASTLOCK held, then
- * FASTLOCK should disable signals.  The code executed under
- * FASTLOCK is otherwise immune to interruption, provided it is
- * not restarted.
- * DCL_LOCK_STATE declares any local variables needed by LOCK and UNLOCK
- * and/or FASTLOCK.
+ * DCL_LOCK_STATE declares any local variables needed by LOCK and UNLOCK.
   *
   * In the PARALLEL_MARK case, we also need to define a number of
   * other inline finctions here:
@@ -44,21 +32,9 @@
   *   
   */  
  # ifdef THREADS
+#  include <atomic_ops.h>
+
     void GC_noop1(word);
-#  ifdef PCR_OBSOLETE  /* Faster, but broken with multiple lwp's       */
-#    include  "th/PCR_Th.h"
-#    include  "th/PCR_ThCrSec.h"
-     extern struct PCR_Th_MLRep GC_allocate_ml;
-#    define DCL_LOCK_STATE  PCR_sigset_t GC_old_sig_mask
-#    define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml) 
-#    define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
-#    define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
-#    define FASTLOCK() PCR_ThCrSec_EnterSys()
-     /* Here we cheat (a lot): */
-#        define FASTLOCK_SUCCEEDED() (*(int *)(&GC_allocate_ml) == 0)
-               /* TRUE if nobody currently holds the lock */
-#    define FASTUNLOCK() PCR_ThCrSec_ExitSys()
-#  endif
  #  ifdef PCR
  #    include <base/PCR_Base.h>
  #    include <th/PCR_Th.h>
@@ -67,18 +43,37 @@
          PCR_ERes GC_fastLockRes; PCR_sigset_t GC_old_sig_mask
  #    define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml)
  #    define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
-#    define FASTLOCK() (GC_fastLockRes = PCR_Th_ML_Try(&GC_allocate_ml))
-#    define FASTLOCK_SUCCEEDED() (GC_fastLockRes == PCR_ERes_okay)
-#    define FASTUNLOCK()  {\
-        if( FASTLOCK_SUCCEEDED() ) PCR_Th_ML_Release(&GC_allocate_ml); }
  #  endif
  
  #  if !defined(AO_have_test_and_set_acquire)
  #    define USE_PTHREAD_LOCKS
  #  endif
  
+#  if defined(GC_WIN32_THREADS) && defined(GC_PTHREADS)
+#    define USE_PTHREAD_LOCKS
+#  endif
  
-#  if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS)
+#  if defined(GC_WIN32_THREADS) && !defined(USE_PTHREAD_LOCKS)
+#    include <windows.h>
+#    define NO_THREAD (DWORD)(-1)
+     extern DWORD GC_lock_holder;
+     extern CRITICAL_SECTION GC_allocate_ml;
+#    ifdef GC_ASSERTIONS
+#        define UNCOND_LOCK() \
+               { EnterCriticalSection(&GC_allocate_ml); \
+                 SET_LOCK_HOLDER(); }
+#        define UNCOND_UNLOCK() \
+               { GC_ASSERT(I_HOLD_LOCK()); UNSET_LOCK_HOLDER(); \
+                 LeaveCriticalSection(&GC_allocate_ml); }
+#    else
+#      define UNCOND_LOCK() EnterCriticalSection(&GC_allocate_ml);
+#      define UNCOND_UNLOCK() LeaveCriticalSection(&GC_allocate_ml);
+#    endif /* !GC_ASSERTIONS */
+#    define SET_LOCK_HOLDER() GC_lock_holder = GetCurrentThreadId()
+#    define UNSET_LOCK_HOLDER() GC_lock_holder = NO_THREAD
+#    define I_HOLD_LOCK() (!GC_need_to_lock \
+                          || GC_lock_holder == GetCurrentThreadId())
+#  elif defined(GC_PTHREADS)
  #    define NO_THREAD (pthread_t)(-1)
  #    include <pthread.h>
  
@@ -144,29 +139,16 @@
  #    endif
  #  endif /* GC_PTHREADS with linux_threads.c implementation */
  
-#  if defined(GC_WIN32_THREADS)
-#    if defined(GC_PTHREADS)
-#      include <pthread.h>
-       extern pthread_mutex_t GC_allocate_ml;
-#      define UNCOND_LOCK()   pthread_mutex_lock(&GC_allocate_ml)
-#      define UNCOND_UNLOCK() pthread_mutex_unlock(&GC_allocate_ml)
-#    else
-#      include <windows.h>
-       GC_API CRITICAL_SECTION GC_allocate_ml;
-#      define UNCOND_LOCK() EnterCriticalSection(&GC_allocate_ml);
-#      define UNCOND_UNLOCK() LeaveCriticalSection(&GC_allocate_ml);
-#    endif
-#  endif
-#  ifndef SET_LOCK_HOLDER
-#      define SET_LOCK_HOLDER()
-#      define UNSET_LOCK_HOLDER()
-#      define I_HOLD_LOCK() FALSE
-               /* Used on platforms were locks can be reacquired,      */
-               /* so it doesn't matter if we lie.                      */
-#  endif
+
  # else /* !THREADS */
-#    define LOCK()
-#    define UNLOCK()
+#   define LOCK()
+#   define UNLOCK()
+#   define SET_LOCK_HOLDER()
+#   define UNSET_LOCK_HOLDER()
+#   define I_HOLD_LOCK() TRUE
+                       /* Used only in positive assertions or to test whether  */
+                       /* we still need to acaquire the lock.  TRUE works in   */
+                       /* either case.                                         */
  # endif /* !THREADS */
  
  #if defined(UNCOND_LOCK) && !defined(LOCK) 
@@ -176,14 +158,6 @@
  #    define UNLOCK() if (GC_need_to_lock) { UNCOND_UNLOCK(); }
  #endif
  
-# ifndef SET_LOCK_HOLDER
-#   define SET_LOCK_HOLDER()
-#   define UNSET_LOCK_HOLDER()
-#   define I_HOLD_LOCK() FALSE
-               /* Used on platforms were locks can be reacquired,      */
-               /* so it doesn't matter if we lie.                      */
-# endif
-
  # ifndef ENTER_GC
  #   define ENTER_GC()
  #   define EXIT_GC()
@@ -193,10 +167,4 @@
  #   define DCL_LOCK_STATE
  # endif
  
-# ifndef FASTLOCK
-#   define FASTLOCK() LOCK()
-#   define FASTLOCK_SUCCEEDED() TRUE
-#   define FASTUNLOCK() UNLOCK()
-# endif
-
  #endif /* GC_LOCKS_H */
diff --git a/include/private/gc_pmark.h b/include/private/gc_pmark.h

index 8a79b9d..1e96f18 100644 (file)
--- a/include/private/gc_pmark.h
+++ b/include/private/gc_pmark.h
@@ -167,23 +167,26 @@ exit_label: ; \
  /* Set mark bit, exit if it was already set.   */
  
  # ifdef USE_MARK_BITS
-/* FIXME: untested */
-#   if defined(THREADS)
-      /* Introduces a benign race as in the byte case. */
-#     define OR_WORD_EXIT_IF_SET(addr, mask, label) \
-       if (!(*(addr) & (mask))) { \
-         AO_or((AO_t *)(addr), (mask); \
-       } else { \
-         goto label; \
-       }
-#   else /* !THREADS */
-#     define OR_WORD_EXIT_IF_SET(addr, mask, label) \
-       if (!(*(addr) & (mask))) { \
-         *(addr) |= (mask); \
-       } else { \
-         goto label; \
-       }
-#   endif
+#   ifdef PARALLEL_MARK
+      /* The following may fail to exit even if the bit was already set.    */
+      /* For our uses, that's benign:                                       */
+#     define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
+        { \
+          if (!(*(addr) & (mask))) { \
+            AO_or((AO_t *)(addr), (mask); \
+          } else { \
+            goto label; \
+          } \
+        }
+#   else
+#     define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
+        { \
+           word old = *(addr); \
+           word my_bits = (bits); \
+           if (old & my_bits) goto exit_label; \
+           *(addr) = (old | my_bits); \
+         }
+#   endif /* !PARALLEL_MARK */
  #   define SET_MARK_BIT_EXIT_IF_SET(hhdr,bit_no,exit_label) \
      { \
          word * mark_word_addr = hhdr -> hb_marks + divWORDSZ(bit_no); \
@@ -194,18 +197,19 @@ exit_label: ; \
  # endif
  
  
-#if defined(I386) && defined(__GNUC__)
+#ifdef USE_MARK_BYTES
+# if defined(I386) && defined(__GNUC__)
  #  define LONG_MULT(hprod, lprod, x, y) { \
         asm("mull %2" : "=a"(lprod), "=d"(hprod) : "g"(y), "0"(x)); \
     }
-#else /* No in-line X86 assembly code */
+# else /* No in-line X86 assembly code */
  #  define LONG_MULT(hprod, lprod, x, y) { \
         unsigned long long prod = (unsigned long long)x \
                                   * (unsigned long long)y; \
         hprod = prod >> 32;  \
         lprod = (unsigned32)prod;  \
     }
-#endif
+# endif
  
    /* There is a race here, and we may set                              */
    /* the bit twice in the concurrent case.  This can result in the     */
@@ -218,6 +222,7 @@ exit_label: ; \
         if (mark_byte) goto exit_label; \
         *mark_byte_addr = 1;  \
      } 
+#endif /* USE_MARK_BYTES */
  
  #ifdef PARALLEL_MARK
  # define INCR_MARKS(hhdr) \
diff --git a/include/private/gc_priv.h b/include/private/gc_priv.h

index d65a393..b55a673 100644 (file)
--- a/include/private/gc_priv.h
+++ b/include/private/gc_priv.h
@@ -63,9 +63,6 @@ typedef char * ptr_t; /* A generic pointer to which we can add        */
  
  # ifndef GCCONFIG_H
  #   include "gcconfig.h"
-#   ifndef USE_MARK_BYTES
-#     define USE_MARK_BYTES
-#   endif
  # endif
  
  # ifndef HEADERS_H
@@ -74,8 +71,8 @@ typedef char * ptr_t; /* A generic pointer to which we can add        */
  
  #if __GNUC__ >= 3
  # define EXPECT(expr, outcome) __builtin_expect(expr,outcome)
-# define INLINE inline
    /* Equivalent to (expr), but predict that usually (expr)==outcome. */
+# define INLINE inline
  #else
  # define EXPECT(expr, outcome) (expr)
  # define INLINE
@@ -192,17 +189,6 @@ typedef char * ptr_t;      /* A generic pointer to which we can add        */
  /*                               */
  /*********************************/
  
-#ifdef SAVE_CALL_CHAIN
-
-/* Fill in the pc and argument information for up to NFRAMES of my     */
-/* callers.  Ignore my frame and my callers frame.                     */
-struct callinfo;
-void GC_save_callers(struct callinfo info[NFRAMES]);
-  
-void GC_print_callers(struct callinfo info[NFRAMES]);
-
-#endif
-
  #ifdef NEED_CALLINFO
      struct callinfo {
         word ci_pc;     /* Caller, not callee, pc       */
@@ -216,6 +202,16 @@ void GC_print_callers(struct callinfo info[NFRAMES]);
      };
  #endif
  
+#ifdef SAVE_CALL_CHAIN
+
+/* Fill in the pc and argument information for up to NFRAMES of my     */
+/* callers.  Ignore my frame and my callers frame.                     */
+void GC_save_callers(struct callinfo info[NFRAMES]);
+  
+void GC_print_callers(struct callinfo info[NFRAMES]);
+
+#endif
+
  
  /*********************************/
  /*                               */
@@ -331,10 +327,10 @@ void GC_print_callers(struct callinfo info[NFRAMES]);
  #   define ABORT(s) PCR_Base_Panic(s)
  # else
  #   ifdef SMALL_CONFIG
-#      define ABORT(msg) abort();
+#      define ABORT(msg) abort()
  #   else
         GC_API void GC_abort(const char * msg);
-#       define ABORT(msg) GC_abort(msg);
+#       define ABORT(msg) GC_abort(msg)
  #   endif
  # endif
  
@@ -660,10 +656,20 @@ struct hblkhdr {
      counter_t hb_n_marks;      /* Number of set mark bits, excluding   */
                                 /* the one always set at the end.       */
                                 /* Currently it is concurrently         */
-                               /* updated and hence only a lower bound.*/
-                               /* But a zero value does gurantee that  */
+                               /* updated and hence only approximate.  */
+                               /* But a zero value does guarantee that */
                                 /* the block contains no marked         */
                                 /* objects.                             */
+                               /* Ensuring this property means that we */
+                               /* never decrement it to zero during a  */
+                               /* collection, and hence the count may  */
+                               /* be one too high.  Due to concurrent  */
+                               /* updates, and arbitrary number of     */
+                               /* increments, but not all of them (!)  */
+                               /* may be lost, hence it may in theory  */
+                               /* be much too low.                     */
+                               /* Without parallel marking, the count  */
+                               /* is accurate.                         */
  #   ifdef USE_MARK_BYTES
        union {
          char _hb_marks[MARK_BITS_SZ];
@@ -676,12 +682,13 @@ struct hblkhdr {
         word dummy;     /* Force word alignment of mark bytes. */
        } _mark_byte_union;
  #     define hb_marks _mark_byte_union._hb_marks
-#     define ANY_INDEX 23      /* Random mark bit index for assertions */
  #   else
        word hb_marks[MARK_BITS_SZ];
  #   endif /* !USE_MARK_BYTES */
  };
  
+# define ANY_INDEX 23  /* "Random" mark bit index for assertions */
+
  /*  heap block body */
  
  # define HBLK_WORDS (HBLKSIZE/sizeof(word))
@@ -1156,28 +1163,9 @@ extern long GC_large_alloc_warn_suppressed;
  /* accessed.                                                           */
  #ifdef PARALLEL_MARK
  # define OR_WORD(addr, bits) \
-       { word old; \
-         do { \
-           old = *((volatile word *)addr); \
-         } while (!GC_compare_and_exchange((addr), old, old | (bits))); \
-       }
-# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
-       { word old; \
-         word my_bits = (bits); \
-         do { \
-           old = *((volatile word *)addr); \
-           if (old & my_bits) goto exit_label; \
-         } while (!GC_compare_and_exchange((addr), old, old | my_bits)); \
-       }
+       { AO_or((volatile AO_t *)(addr), (AO_t)bits); }
  #else
  # define OR_WORD(addr, bits) *(addr) |= (bits)
-# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
-       { \
-         word old = *(addr); \
-         word my_bits = (bits); \
-         if (old & my_bits) goto exit_label; \
-         *(addr) = (old | my_bits); \
-       }
  #endif
  
  /* Mark bit operations */
@@ -1338,7 +1326,7 @@ void GC_with_callee_saves_pushed(void (*fn)(ptr_t, void *),
  # if defined(SPARC) || defined(IA64)
    /* Cause all stacked registers to be saved in memory.  Return a      */
    /* pointer to the top of the corresponding memory stack.             */
-  word GC_save_regs_in_stack(void);
+  ptr_t GC_save_regs_in_stack(void);
  # endif
                         /* Push register contents onto mark stack.      */
                         /* If NURSERY is defined, the default push      */
@@ -1504,7 +1492,8 @@ ptr_t GC_build_fl(struct hblk *h, size_t words, GC_bool clear, ptr_t list);
                                 /* called by GC_new_hblk, but also      */
                                 /* called explicitly without GC lock.   */
  
-struct hblk * GC_allochblk (size_t size_in_bytes, int kind, unsigned flags);
+struct hblk * GC_allochblk (size_t size_in_bytes, int kind,
+                           unsigned char flags);
                                 /* Allocate a heap block, inform        */
                                 /* the marker that block is valid       */
                                 /* for objects of indicated size.       */
@@ -1766,9 +1755,6 @@ GC_bool GC_page_was_dirty(struct hblk *h);
                         /* Read retrieved dirty bits.   */
  GC_bool GC_page_was_ever_dirty(struct hblk *h);
                         /* Could the page contain valid heap pointers?  */
-void GC_is_fresh(struct hblk *h, word n);
-                       /* Assert the region currently contains no      */
-                       /* valid pointers.                              */
  void GC_remove_protection(struct hblk *h, word nblocks,
                           GC_bool pointerfree);
                         /* h is about to be writteni or allocated.  Ensure  */
@@ -1896,7 +1882,7 @@ void GC_err_puts(const char *s);
                 /* some other reason.                                   */
  # endif /* PARALLEL_MARK */
  
-# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS)
+# if defined(GC_PTHREADS)
    /* We define the thread suspension signal here, so that we can refer */
    /* to it in the dirty bit implementation, if necessary.  Ideally we  */
    /* would allocate a (real-time ?) signal using the standard mechanism.*/
diff --git a/include/private/gcconfig.h b/include/private/gcconfig.h

index 9fe0419..9b80cbe 100644 (file)
--- a/include/private/gcconfig.h
+++ b/include/private/gcconfig.h
@@ -854,6 +854,7 @@
  #     define ALIGNMENT 4       /* Required by hardware */
  #     define CPP_WORDSZ 32
  #   endif
+#   define USE_ASM_PUSH_REGS
  #   ifdef SUNOS5
  #      define OS_TYPE "SUNOS5"
         extern int _etext[];
@@ -1146,7 +1147,11 @@
  #       if !defined(__WATCOMC__) && !defined(GC_WIN32_THREADS)
  #        define MPROTECT_VDB
  #      endif
-#       define GWW_VDB
+#      if _MSC_VER >= 1300  /* .NET, i.e. > VisualStudio 6     */
+#         define GWW_VDB
+#      else
+#        define MPROTECT_VDB
+#      endif
  #       define DATAEND  /* not needed */
  #   endif
  #   ifdef MSWINCE
@@ -1490,13 +1495,6 @@
  
  # ifdef IA64
  #   define MACH_TYPE "IA64"
-       /* We need to get preserved registers in addition to register   */
-       /* windows.   That's easiest to do with setjmp.                 */
-#   ifdef PARALLEL_MARK
-#      define USE_MARK_BYTES
-           /* Compare-and-exchange is too expensive to use for         */
-           /* setting mark bits.                                       */
-#   endif
  #   ifdef HPUX
  #      ifdef _ILP32
  #        define CPP_WORDSZ 32
@@ -1992,6 +1990,14 @@
  #   define THREADS
  # endif
  
+# if !defined(USE_MARK_BITS) && !defined(USE_MARK_BYTES)
+#   if defined(THREADS) && defined(PARALLEL_MARK)
+#     define USE_MARK_BYTES
+#   else
+#     define USE_MARK_BITS
+#   endif
+# endif
+
  # if defined(MSWINCE)
  #   define NO_GETENV
  # endif
diff --git a/include/private/pthread_support.h b/include/private/pthread_support.h

index b2ef68e..77f1ad1 100644 (file)
--- a/include/private/pthread_support.h
+++ b/include/private/pthread_support.h
@@ -3,8 +3,7 @@
  
  # include "private/gc_priv.h"
  
-# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
-     && !defined(GC_WIN32_THREADS)
+# if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS)
       
  #if defined(GC_DARWIN_THREADS)
  # include "private/darwin_stop_world.h"
@@ -67,7 +66,7 @@ typedef struct GC_Thread_Rep {
  #   endif
  } * GC_thread;
  
-# define THREAD_TABLE_SZ 128   /* Must be power of 2   */
+# define THREAD_TABLE_SZ 256   /* Must be power of 2   */
  extern volatile GC_thread GC_threads[THREAD_TABLE_SZ];
  
  extern GC_bool GC_thr_initialized;
diff --git a/include/private/thread_local_alloc.h b/include/private/thread_local_alloc.h

index 32cbb08..3416931 100644 (file)
--- a/include/private/thread_local_alloc.h
+++ b/include/private/thread_local_alloc.h
@@ -19,6 +19,45 @@
  /* implementation also exports GC_malloc and friends, which    */
  /* are declared in gc.h.                                       */
  
+#include "private/gc_priv.h"
+
+#if defined(THREAD_LOCAL_ALLOC)
+
+#include "gc_inline.h"
+
+
+# if defined USE_HPUX_TLS
+#   error USE_HPUX_TLS macro was replaced by USE_COMPILER_TLS
+# endif
+
+# if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC) && \
+     !defined(USE_WIN32_COMPILER_TLS) && !defined(USE_COMPILER_TLS) && \
+     !defined(USE_CUSTOM_SPECIFIC)
+#   if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32)
+#     if defined(__GNUC__)  /* Fixed for versions past 2.95? */
+#       define USE_WIN32_SPECIFIC
+#     else
+#       define USE_WIN32_COMPILER_TLS
+#     endif /* !GNU */
+#   elif defined(LINUX) && defined(__GNUC__)
+#     define USE_COMPILER_TLS
+#   elif (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \
+         defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS))
+#     define USE_PTHREAD_SPECIFIC
+#   elif defined(GC_HPUX_THREADS)
+#     ifdef __GNUC__
+#      define USE_PTHREAD_SPECIFIC
+         /* Empirically, as of gcc 3.3, USE_COMPILER_TLS doesn't work. */
+#     else
+#      define USE_COMPILER_TLS
+#     endif
+#   else
+#     define USE_CUSTOM_SPECIFIC  /* Use our own.      */
+#   endif
+# endif
+
+# include <stdlib.h>
+
  /* One of these should be declared as the tlfs field in the    */
  /* structure pointed to by a GC_thread.                                */
  typedef struct thread_local_freelists {
@@ -52,22 +91,27 @@ typedef struct thread_local_freelists {
  #   define GC_key_create pthread_key_create
  #   define GC_remove_specific()  /* No need for cleanup on exit. */
      typedef pthread_key_t GC_key_t;
-# elif defined(USE_COMPILER_TLS)
+# elif defined(USE_COMPILER_TLS) || defined(USE_WIN32_COMPILER_TLS)
  #   define GC_getspecific(x) (x)
  #   define GC_setspecific(key, v) ((key) = (v), 0)
  #   define GC_key_create(key, d) 0
  #   define GC_remove_specific()  /* No need for cleanup on exit. */
      typedef void * GC_key_t;
  # elif defined(USE_WIN32_SPECIFIC)
+#   include <windows.h>
  #   define GC_getspecific TlsGetValue
-#   define GC_setspecific TlsSetValue
+#   define GC_setspecific(key, v) !TlsSetValue(key, v)
+       /* We assume 0 == success, msft does the opposite.      */
  #   define GC_key_create(key, d)  \
         ((d) != 0? (ABORT("Destructor unsupported by TlsAlloc"),0) \
                  : (*(key) = TlsAlloc(), 0))
  #   define GC_remove_specific()  /* No need for cleanup on thread exit. */
         /* Need TlsFree on process exit/detach ? */
-# else
+    typedef DWORD GC_key_t;
+# elif defined(USE_CUSTOM_SPECIFIC)
  #   include "private/specific.h"
+# else
+#   error implement me
  # endif
  
  
@@ -86,14 +130,18 @@ void GC_destroy_thread_local(GC_tlfs p);
  /* we take care of an individual thread freelist structure.    */
  void GC_mark_thread_local_fls_for(GC_tlfs p);
  
-#ifdef USE_COMPILER_TLS
+extern
+#if defined(USE_COMPILER_TLS)
    __thread
+#elif defined(USE_WIN32_COMPILER_TLS)
+  declspec(thread)
  #endif
  GC_key_t GC_thread_key;
+
  /* This is set up by the thread_local_alloc implementation.  But the   */
  /* thread support layer calls GC_remove_specific(GC_thread_key)                */
  /* before a thread exits.                                              */
  /* And the thread support layer makes sure that GC_thread_key is traced,*/
  /* if necessary.                                                       */
  
-
+#endif /* THREAD_LOCAL_ALLOC */
diff --git a/mach_dep.c b/mach_dep.c

index 50b5665..ca1ace1 100644 (file)
--- a/mach_dep.c
+++ b/mach_dep.c
@@ -65,7 +65,7 @@ asm static void PushMacRegisters()
  # if defined(SPARC) || defined(IA64)
      /* Value returned from register flushing routine; either sp (SPARC) */
      /* or ar.bsp (IA64)                                                        */
-    word GC_save_regs_ret_val;
+    ptr_t GC_save_regs_ret_val;
  # endif
  
  /* Routine to mark from registers that are preserved by the C compiler. */
@@ -265,88 +265,12 @@ ptr_t cold_gc_frame;
      GC_with_callee_saves_pushed(GC_push_current_stack, cold_gc_frame);
  }
  
-/* On register window machines, we need a way to force registers into  */
-/* the stack.  Return sp.                                              */
-# ifdef SPARC
-    asm("      .seg    \"text\"");
-#   if defined(SVR4) || defined(NETBSD) || defined(FREEBSD)
-      asm("    .globl  GC_save_regs_in_stack");
-      asm("GC_save_regs_in_stack:");
-      asm("    .type GC_save_regs_in_stack,#function");
-#   else
-      asm("    .globl  _GC_save_regs_in_stack");
-      asm("_GC_save_regs_in_stack:");
-#   endif
-#   if defined(__arch64__) || defined(__sparcv9)
-      asm("    save    %sp,-128,%sp");
-      asm("    flushw");
-      asm("    ret");
-      asm("    restore %sp,2047+128,%o0");
-#   else
-      asm("    ta      0x3   ! ST_FLUSH_WINDOWS");
-      asm("    retl");
-      asm("    mov     %sp,%o0");
-#   endif
-#   ifdef SVR4
-      asm("    .GC_save_regs_in_stack_end:");
-      asm("    .size GC_save_regs_in_stack,.GC_save_regs_in_stack_end-GC_save_regs_in_stack");
-#   endif
-#   ifdef LINT
-       word GC_save_regs_in_stack() { return(0 /* sp really */);}
-#   endif
-# endif
-
-/* GC_clear_stack_inner(arg, limit) clears stack area up to limit and  */
-/* returns arg.  Stack clearing is crucial on SPARC, so we supply      */
-/* an assembly version that's more careful.  Assumes limit is hotter   */
-/* than sp, and limit is 8 byte aligned.                               */
  #if defined(ASM_CLEAR_CODE)
-#ifndef SPARC
-       --> fix it
-#endif
-  asm(".globl GC_clear_stack_inner");
-  asm("GC_clear_stack_inner:");
-  asm(".type GC_save_regs_in_stack,#function");
-#if defined(__arch64__) || defined(__sparcv9)
-  asm("mov %sp,%o2");          /* Save sp                      */
-  asm("add %sp,2047-8,%o3");   /* p = sp+bias-8                */
-  asm("add %o1,-2047-192,%sp");        /* Move sp out of the way,      */
-                               /* so that traps still work.    */
-                               /* Includes some extra words    */
-                               /* so we can be sloppy below.   */
-  asm("loop:");
-  asm("stx %g0,[%o3]");                /* *(long *)p = 0               */
-  asm("cmp %o3,%o1");
-  asm("bgu,pt %xcc, loop");    /* if (p > limit) goto loop     */
-    asm("add %o3,-8,%o3");     /* p -= 8 (delay slot) */
-  asm("retl");
-    asm("mov %o2,%sp");                /* Restore sp., delay slot      */
-#else
-  asm("mov %sp,%o2");          /* Save sp      */
-  asm("add %sp,-8,%o3");       /* p = sp-8     */
-  asm("clr %g1");              /* [g0,g1] = 0  */
-  asm("add %o1,-0x60,%sp");    /* Move sp out of the way,      */
-                               /* so that traps still work.    */
-                               /* Includes some extra words    */
-                               /* so we can be sloppy below.   */
-  asm("loop:");
-  asm("std %g0,[%o3]");                /* *(long long *)p = 0  */
-  asm("cmp %o3,%o1");
-  asm("bgu loop        ");             /* if (p > limit) goto loop     */
-    asm("add %o3,-8,%o3");     /* p -= 8 (delay slot) */
-  asm("retl");
-    asm("mov %o2,%sp");                /* Restore sp., delay slot      */
-#endif /* old SPARC */
-  /* First argument = %o0 = return value */
-#   ifdef SVR4
-      asm("    .GC_clear_stack_inner_end:");
-      asm("    .size GC_clear_stack_inner,.GC_clear_stack_inner_end-GC_clear_stack_inner");
-#   endif
-  
  # ifdef LINT
      /*ARGSUSED*/
      ptr_t GC_clear_stack_inner(arg, limit)
      ptr_t arg; word limit;
      { return(arg); }
+    /* The real version is in a .S file */
  # endif
  #endif /* ASM_CLEAR_CODE */ 
diff --git a/malloc.c b/malloc.c

index a36956a..1513735 100644 (file)
--- a/malloc.c
+++ b/malloc.c
@@ -215,14 +215,14 @@ void * GC_generic_malloc(size_t lb, int k)
      if(SMALL_OBJ(lb)) {
         lg = GC_size_map[lb];
         opp = &(GC_aobjfreelist[lg]);
-       FASTLOCK();
-        if( EXPECT(!FASTLOCK_SUCCEEDED() || (op = *opp) == 0, 0) ) {
-            FASTUNLOCK();
+       LOCK();
+        if( EXPECT((op = *opp) == 0, 0) ) {
+            UNLOCK();
              return(GENERAL_MALLOC((word)lb, PTRFREE));
          }
          *opp = obj_link(op);
          GC_bytes_allocd += GRANULES_TO_BYTES(lg);
-        FASTUNLOCK();
+        UNLOCK();
          return((void *) op);
     } else {
         return(GENERAL_MALLOC((word)lb, PTRFREE));
@@ -244,9 +244,9 @@ void * GC_generic_malloc(size_t lb, int k)
      if(SMALL_OBJ(lb)) {
         lg = GC_size_map[lb];
         opp = (void **)&(GC_objfreelist[lg]);
-       FASTLOCK();
-        if( EXPECT(!FASTLOCK_SUCCEEDED() || (op = *opp) == 0, 0) ) {
-            FASTUNLOCK();
+       LOCK();
+        if( EXPECT((op = *opp) == 0, 0) ) {
+            UNLOCK();
              return(GENERAL_MALLOC((word)lb, NORMAL));
          }
          /* See above comment on signals.       */
@@ -258,7 +258,7 @@ void * GC_generic_malloc(size_t lb, int k)
          *opp = obj_link(op);
          obj_link(op) = 0;
          GC_bytes_allocd += GRANULES_TO_BYTES(lg);
-        FASTUNLOCK();
+        UNLOCK();
          return op;
     } else {
         return(GENERAL_MALLOC(lb, NORMAL));
diff --git a/mallocx.c b/mallocx.c

index 761514d..91e41d5 100644 (file)
--- a/mallocx.c
+++ b/mallocx.c
@@ -451,8 +451,8 @@ void * GC_malloc_uncollectable(size_t lb)
                   /* collected anyway.                                  */
         lg = GC_size_map[lb];
         opp = &(GC_uobjfreelist[lg]);
-       FASTLOCK();
-        if( FASTLOCK_SUCCEEDED() && (op = *opp) != 0 ) {
+       LOCK();
+        if( (op = *opp) != 0 ) {
              /* See above comment on signals.   */
              *opp = obj_link(op);
              obj_link(op) = 0;
@@ -461,28 +461,31 @@ void * GC_malloc_uncollectable(size_t lb)
             /* cleared only temporarily during a collection, as a       */
             /* result of the normal free list mark bit clearing.        */
              GC_non_gc_bytes += GRANULES_TO_BYTES(lg);
-            FASTUNLOCK();
-            return((void *) op);
-        }
-        FASTUNLOCK();
-        op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
+            UNLOCK();
+        } else {
+            UNLOCK();
+            op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
+           /* For small objects, the free lists are completely marked. */
+       }
+       GC_ASSERT(0 == op || GC_is_marked(op));
+        return((void *) op);
      } else {
-       op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
-    }
-    if (0 == op) return(0);
-    /* We don't need the lock here, since we have an undisguised       */
-    /* pointer.  We do need to hold the lock while we adjust           */
-    /* mark bits.                                                      */
-    {
-       register struct hblk * h;
         size_t lb;
+       hdr * hhdr;
         
-       h = HBLKPTR(op);
-       lb = HDR(h) -> hb_sz;
+       op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
+        if (0 == op) return(0);
         
+       GC_ASSERT(((word)op & (HBLKSIZE - 1)) == 0); /* large block */
+       hhdr = HDR((struct hbklk *)op);
+       /* We don't need the lock here, since we have an undisguised    */
+       /* pointer.  We do need to hold the lock while we adjust        */
+       /* mark bits.                                                   */
+       lb = hhdr -> hb_sz;
         LOCK();
-       GC_set_mark_bit(op);
-       GC_non_gc_bytes += lb;
+       set_mark_bit_from_hdr(hhdr, 0); /* Only object. */
+       GC_ASSERT(hhdr -> hb_n_marks == 0);
+       hhdr -> hb_n_marks = 1;
         UNLOCK();
         return((void *) op);
      }
@@ -538,36 +541,36 @@ void * GC_malloc_atomic_uncollectable(size_t lb)
                   /* collected anyway.                                  */
         lg = GC_size_map[lg];
         opp = &(GC_auobjfreelist[lg]);
-       FASTLOCK();
-        if( FASTLOCK_SUCCEEDED() && (op = *opp) != 0 ) {
+       LOCK();
+        if( (op = *opp) != 0 ) {
              /* See above comment on signals.   */
              *opp = obj_link(op);
              obj_link(op) = 0;
              GC_bytes_allocd += GRANULES_TO_BYTES(lg);
             /* Mark bit was already set while object was on free list. */
              GC_non_gc_bytes += GRANULES_TO_BYTES(lg);
-            FASTUNLOCK();
-            return((void *) op);
-        }
-        FASTUNLOCK();
-        op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
+            UNLOCK();
+        } else {
+            UNLOCK();
+            op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
+       }
+       GC_ASSERT(0 == op || GC_is_marked(op));
+        return((void *) op);
      } else {
-       op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
-    }
-    if (0 == op) return(0);
-    /* We don't need the lock here, since we have an undisguised       */
-    /* pointer.  We do need to hold the lock while we adjust           */
-    /* mark bits.                                                      */
-    {
-       struct hblk * h;
         size_t lb;
+       hdr * hhdr;
         
-       h = HBLKPTR(op);
-       lb = HDR(h) -> hb_sz;
+       op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
+        if (0 == op) return(0);
+
+       GC_ASSERT(((word)op & (HBLKSIZE - 1)) == 0);
+       hhdr = HDR((struct hbklk *)op);
+       lb = hhdr -> hb_sz;
         
         LOCK();
-       GC_set_mark_bit(op);
-       GC_non_gc_bytes += lb;
+       set_mark_bit_from_hdr(hhdr, 0); /* Only object. */
+       GC_ASSERT(hhdr -> hb_n_marks == 0);
+       hhdr -> hb_n_marks = 1;
         UNLOCK();
         return((void *) op);
      }
diff --git a/mark.c b/mark.c

index 641f0d6..d46c1ac 100644 (file)
--- a/mark.c
+++ b/mark.c
@@ -156,7 +156,7 @@ void GC_clear_hdr_marks(hdr *hhdr)
  /* Set all mark bits in the header.  Used for uncollectable blocks. */
  void GC_set_hdr_marks(hdr *hhdr)
  {
-    int i;
+    unsigned i;
      size_t sz = hhdr -> hb_sz;
      int n_marks = FINAL_MARK_BIT(sz);
  
@@ -214,7 +214,7 @@ void GC_clear_mark_bit(ptr_t p)
        int n_marks;
        clear_mark_bit_from_hdr(hhdr, bit_no);
        n_marks = hhdr -> hb_n_marks - 1;
-#     ifdef THREADS
+#     ifdef PARALLEL_MARK
          if (n_marks != 0)
            hhdr -> hb_n_marks = n_marks; 
          /* Don't decrement to zero.  The counts are approximate due to */
@@ -1473,7 +1473,7 @@ void GC_push_all_eager(ptr_t bottom, ptr_t top)
      word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
      word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
      register word *p;
-    register word q;
+    register ptr_t q;
      register word *lim;
      register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
      register ptr_t least_ha = GC_least_plausible_heap_addr;
@@ -1485,7 +1485,7 @@ void GC_push_all_eager(ptr_t bottom, ptr_t top)
      /* to be valid.                                            */
        lim = t - 1 /* longword */;
        for (p = b; p <= lim; p = (word *)(((ptr_t)p) + ALIGNMENT)) {
-       q = *p;
+       q = (ptr_t)(*p);
         GC_PUSH_ONE_STACK((ptr_t)q, p);
        }
  #   undef GC_greatest_plausible_heap_addr
@@ -1508,7 +1508,6 @@ void GC_push_all_stack_partially_eager(ptr_t bottom, ptr_t top,
                                        ptr_t cold_gc_frame)
  {
    if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) {
-#   define EAGER_BYTES 1024
      /* Push the hot end of the stack eagerly, so that register values   */
      /* saved inside GC frames are marked before they disappear.                */
      /* The rest of the marking can be deferred until later.            */
@@ -1546,21 +1545,52 @@ void GC_push_all_stack(ptr_t bottom, ptr_t top)
  # endif
  }
  
-#if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES)
+#if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \
+    defined(MARK_BIT_PER_GRANULE)
+# if GC_GRANULE_WORDS == 1
+#   define USE_PUSH_MARKED_ACCELERATORS
+#   define PUSH_GRANULE(q) \
+               { ptr_t qcontents = (ptr_t)((q)[0]); \
+                 GC_PUSH_ONE_HEAP(qcontents, (q)); }
+# elif GC_GRANULE_WORDS == 2
+#   define USE_PUSH_MARKED_ACCELERATORS
+#   define PUSH_GRANULE(q) \
+               { ptr_t qcontents = (ptr_t)((q)[0]); \
+                 GC_PUSH_ONE_HEAP(qcontents, (q)); \
+                 qcontents = (ptr_t)((q)[1]); \
+                 GC_PUSH_ONE_HEAP(qcontents, (q)+1); }
+# elif GC_GRANULE_WORDS == 4
+#   define USE_PUSH_MARKED_ACCELERATORS
+#   define PUSH_GRANULE(q) \
+               { ptr_t qcontents = (ptr_t)((q)[0]); \
+                 GC_PUSH_ONE_HEAP(qcontents, (q)); \
+                 qcontents = (ptr_t)((q)[1]); \
+                 GC_PUSH_ONE_HEAP(qcontents, (q)+1); \
+                 qcontents = (ptr_t)((q)[2]); \
+                 GC_PUSH_ONE_HEAP(qcontents, (q)+2); \
+                 qcontents = (ptr_t)((q)[3]); \
+                 GC_PUSH_ONE_HEAP(qcontents, (q)+3); }
+# endif
+#endif
+
+#ifdef USE_PUSH_MARKED_ACCELERATORS
  /* Push all objects reachable from marked objects in the given block */
-/* of size 1 objects.                                               */
+/* containing objects of size 1 granule.                            */
  void GC_push_marked1(struct hblk *h, hdr *hhdr)
  {
      word * mark_word_addr = &(hhdr->hb_marks[0]);
-    register word *p;
+    word *p;
      word *plim;
-    register int i;
-    register word q;
-    register word mark_word;
-    register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
-    register ptr_t least_ha = GC_least_plausible_heap_addr;
-    register mse * mark_stack_top = GC_mark_stack_top;
-    register mse * mark_stack_limit = GC_mark_stack_limit;
+    word *q;
+    word mark_word;
+
+    /* Allow registers to be used for some frequently acccessed        */
+    /* global variables.  Otherwise aliasing issues are likely */
+    /* to prevent that.                                                */
+    ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
+    ptr_t least_ha = GC_least_plausible_heap_addr;
+    mse * mark_stack_top = GC_mark_stack_top;
+    mse * mark_stack_limit = GC_mark_stack_limit;
  #   define GC_mark_stack_top mark_stack_top
  #   define GC_mark_stack_limit mark_stack_limit
  #   define GC_greatest_plausible_heap_addr greatest_ha
@@ -1572,21 +1602,22 @@ void GC_push_marked1(struct hblk *h, hdr *hhdr)
      /* go through all words in block */
         while( p < plim )  {
             mark_word = *mark_word_addr++;
-           i = 0;
+           q = p;
             while(mark_word != 0) {
               if (mark_word & 1) {
-                 q = p[i];
-                 GC_PUSH_ONE_HEAP(q, p + i);
+                 PUSH_GRANULE(q);
               }
-             i++;
+             q += GC_GRANULE_WORDS;
               mark_word >>= 1;
             }
-           p += WORDSZ;
+           p += WORDSZ*GC_GRANULE_WORDS;
         }
+
  #   undef GC_greatest_plausible_heap_addr
  #   undef GC_least_plausible_heap_addr        
  #   undef GC_mark_stack_top
  #   undef GC_mark_stack_limit
+
      GC_mark_stack_top = mark_stack_top;
  }
  
@@ -1594,19 +1625,20 @@ void GC_push_marked1(struct hblk *h, hdr *hhdr)
  #ifndef UNALIGNED
  
  /* Push all objects reachable from marked objects in the given block */
-/* of size 2 objects.                                               */
+/* of size 2 (granules) objects.                                    */
  void GC_push_marked2(struct hblk *h, hdr *hhdr)
  {
      word * mark_word_addr = &(hhdr->hb_marks[0]);
-    register word *p;
+    word *p;
      word *plim;
-    register int i;
-    register word q;
-    register word mark_word;
-    register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
-    register ptr_t least_ha = GC_least_plausible_heap_addr;
-    register mse * mark_stack_top = GC_mark_stack_top;
-    register mse * mark_stack_limit = GC_mark_stack_limit;
+    word *q;
+    word mark_word;
+
+    ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
+    ptr_t least_ha = GC_least_plausible_heap_addr;
+    mse * mark_stack_top = GC_mark_stack_top;
+    mse * mark_stack_limit = GC_mark_stack_limit;
+
  #   define GC_mark_stack_top mark_stack_top
  #   define GC_mark_stack_limit mark_stack_limit
  #   define GC_greatest_plausible_heap_addr greatest_ha
@@ -1618,42 +1650,43 @@ void GC_push_marked2(struct hblk *h, hdr *hhdr)
      /* go through all words in block */
         while( p < plim )  {
             mark_word = *mark_word_addr++;
-           i = 0;
+           q = p;
             while(mark_word != 0) {
               if (mark_word & 1) {
-                 q = p[i];
-                 GC_PUSH_ONE_HEAP(q, p + i);
-                 q = p[i+1];
-                 GC_PUSH_ONE_HEAP(q, p + i);
+                 PUSH_GRANULE(q);
+                 PUSH_GRANULE(q + GC_GRANULE_WORDS);
               }
-             i += 2;
+             q += 2 * GC_GRANULE_WORDS;
               mark_word >>= 2;
             }
-           p += WORDSZ;
+           p += WORDSZ*GC_GRANULE_WORDS;
         }
+
  #   undef GC_greatest_plausible_heap_addr
  #   undef GC_least_plausible_heap_addr        
  #   undef GC_mark_stack_top
  #   undef GC_mark_stack_limit
+
      GC_mark_stack_top = mark_stack_top;
  }
  
+# if GC_GRANULE_WORDS < 4
  /* Push all objects reachable from marked objects in the given block */
-/* of size 4 objects.                                               */
+/* of size 4 (granules) objects.                                    */
  /* There is a risk of mark stack overflow here.  But we handle that. */
  /* And only unmarked objects get pushed, so it's not very likely.    */
  void GC_push_marked4(struct hblk *h, hdr *hhdr)
  {
      word * mark_word_addr = &(hhdr->hb_marks[0]);
-    register word *p;
+    word *p;
      word *plim;
-    register int i;
-    register word q;
-    register word mark_word;
-    register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
-    register ptr_t least_ha = GC_least_plausible_heap_addr;
-    register mse * mark_stack_top = GC_mark_stack_top;
-    register mse * mark_stack_limit = GC_mark_stack_limit;
+    word *q;
+    word mark_word;
+
+    ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
+    ptr_t least_ha = GC_least_plausible_heap_addr;
+    mse * mark_stack_top = GC_mark_stack_top;
+    mse * mark_stack_limit = GC_mark_stack_limit;
  #   define GC_mark_stack_top mark_stack_top
  #   define GC_mark_stack_limit mark_stack_limit
  #   define GC_greatest_plausible_heap_addr greatest_ha
@@ -1665,22 +1698,18 @@ void GC_push_marked4(struct hblk *h, hdr *hhdr)
      /* go through all words in block */
         while( p < plim )  {
             mark_word = *mark_word_addr++;
-           i = 0;
+           q = p;
             while(mark_word != 0) {
               if (mark_word & 1) {
-                 q = p[i];
-                 GC_PUSH_ONE_HEAP(q, p + i);
-                 q = p[i+1];
-                 GC_PUSH_ONE_HEAP(q, p + i + 1);
-                 q = p[i+2];
-                 GC_PUSH_ONE_HEAP(q, p + i + 2);
-                 q = p[i+3];
-                 GC_PUSH_ONE_HEAP(q, p + i + 3);
+                 PUSH_GRANULE(q);
+                 PUSH_GRANULE(q + GC_GRANULE_WORDS);
+                 PUSH_GRANULE(q + 2*GC_GRANULE_WORDS);
+                 PUSH_GRANULE(q + 3*GC_GRANULE_WORDS);
               }
-             i += 4;
+             q += 4 * GC_GRANULE_WORDS;
               mark_word >>= 4;
             }
-           p += WORDSZ;
+           p += WORDSZ*GC_GRANULE_WORDS;
         }
  #   undef GC_greatest_plausible_heap_addr
  #   undef GC_least_plausible_heap_addr        
@@ -1689,9 +1718,11 @@ void GC_push_marked4(struct hblk *h, hdr *hhdr)
      GC_mark_stack_top = mark_stack_top;
  }
  
+#endif /* GC_GRANULE_WORDS < 4 */
+
  #endif /* UNALIGNED */
  
-#endif /* SMALL_CONFIG */
+#endif /* USE_PUSH_MARKED_ACCELERATORS */
  
  /* Push all objects reachable from marked objects in the given block */
  void GC_push_marked(struct hblk *h, hdr *hhdr)
@@ -1715,20 +1746,21 @@ void GC_push_marked(struct hblk *h, hdr *hhdr)
          lim = (h + 1)->hb_body - sz;
      }
      
-    switch(BYTES_TO_WORDS(sz)) {
-#   if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES)   
+    switch(BYTES_TO_GRANULES(sz)) {
+#   if defined(USE_PUSH_MARKED_ACCELERATORS)
       case 1:
         GC_push_marked1(h, hhdr);
         break;
-#   endif
-#   if !defined(SMALL_CONFIG) && !defined(UNALIGNED) && \
-       !defined(USE_MARK_BYTES)
-     case 2:
-       GC_push_marked2(h, hhdr);
-       break;
-     case 4:
-       GC_push_marked4(h, hhdr);
-       break;
+#    if !defined(UNALIGNED)
+       case 2:
+         GC_push_marked2(h, hhdr);
+         break;
+#     if GC_GRANULE_WORDS < 4
+       case 4:
+         GC_push_marked4(h, hhdr);
+         break;
+#     endif
+#    endif
  #   endif       
       default:
        GC_mark_stack_top_reg = GC_mark_stack_top;
diff --git a/mark_rts.c b/mark_rts.c

index 19ea80a..bd97c6e 100644 (file)
--- a/mark_rts.c
+++ b/mark_rts.c
@@ -593,7 +593,7 @@ void GC_push_roots(GC_bool all, ptr_t cold_gc_frame)
       /* If the world is not stopped, this is unsafe.  It is    */
       /* also unnecessary, since we will do this again with the */
       /* world stopped.                                         */
-#      if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+#      if defined(THREAD_LOCAL_ALLOC)
           if (GC_world_stopped) GC_mark_thread_local_free_lists();
  #      endif
  
diff --git a/misc.c b/misc.c

index 70e37fb..10bf512 100644 (file)
--- a/misc.c
+++ b/misc.c
@@ -43,31 +43,12 @@
    int GC_log;  /* Forward decl, so we can set it.      */
  #endif
  
-# ifdef THREADS
-#   ifdef PCR
-#     include "il/PCR_IL.h"
-      PCR_Th_ML GC_allocate_ml;
-#   elif defined(GC_WIN32_THREADS) 
-#     if defined(GC_PTHREADS)
-       pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
-#     elif defined(GC_DLL)
-        __declspec(dllexport) CRITICAL_SECTION GC_allocate_ml;
-#     else
-        CRITICAL_SECTION GC_allocate_ml;
-#     endif
-#   elif defined(GC_PTHREADS)
-#     if defined(USE_SPIN_LOCK)
-        pthread_t GC_lock_holder = NO_THREAD;
-#     else
-       pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
-       pthread_t GC_lock_holder = NO_THREAD;
-               /* Used only for assertions, and to prevent      */
-               /* recursive reentry in the system call wrapper. */
-#     endif 
-#   else
-       --> declare allocator lock here
-#   endif
-# endif
+#if defined(THREADS) && defined(PCR)
+# include "il/PCR_IL.h"
+  PCR_Th_ML GC_allocate_ml;
+#endif
+/* For other platforms with threads, the lock and possibly             */
+/* GC_lock_holder variables are defined in the thread support code.    */
  
  #if defined(NOSYS) || defined(ECOS)
  #undef STACKBASE
@@ -157,7 +138,7 @@ void * GC_project2(void *arg1, void *arg2)
  /* quantization alogrithm (but we precompute it).                      */ 
  void GC_init_size_map(void)
  {
-    register unsigned i;
+    int i;
  
      /* Map size 0 to something bigger.                 */
      /* This avoids problems at lower levels.           */
@@ -423,7 +404,7 @@ void GC_init(void)
  #if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
      if (!GC_is_initialized) {
        BOOL (WINAPI *pfn) (LPCRITICAL_SECTION, DWORD) = NULL;
-      HMODULE hK32 = GetModuleHandle("kernel32.dll");
+      HMODULE hK32 = GetModuleHandleA("kernel32.dll");
        if (hK32)
            (FARPROC) pfn = GetProcAddress(hK32,
                           "InitializeCriticalSectionAndSpinCount");
diff --git a/os_dep.c b/os_dep.c

index d78f8e9..e43062f 100644 (file)
--- a/os_dep.c
+++ b/os_dep.c
@@ -80,10 +80,12 @@
  # undef GC_AMIGA_DEF
  #endif
  
-#if defined(MSWIN32) || defined(MSWINCE)
+#if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32)
  # define WIN32_LEAN_AND_MEAN
  # define NOSERVICE
  # include <windows.h>
+  /* It's not clear this is completely kosher under Cygwin.  But it    */
+  /* allows us to get a working GC_get_stack_base.                     */
  #endif
  
  #ifdef MACOS
@@ -468,7 +470,7 @@ void GC_enable_signals(void)
        && !defined(MACOS) && !defined(DJGPP) && !defined(DOS4GW) \
        && !defined(NOSYS) && !defined(ECOS)
  
-#   if defined(sigmask) && !defined(UTS4) && !defined(HURD)
+#   if 0
         /* Use the traditional BSD interface */
  #      define SIGSET_T int
  #      define SIG_DEL(set, signal) (set) &= ~(sigmask(signal))
@@ -477,14 +479,15 @@ void GC_enable_signals(void)
           /* longjmp implementations.  Most systems appear not to have  */
           /* a signal 32.                                               */
  #      define SIGSETMASK(old, new) (old) = sigsetmask(new)
-#   else
-       /* Use POSIX/SYSV interface     */
-#      define SIGSET_T sigset_t
-#      define SIG_DEL(set, signal) sigdelset(&(set), (signal))
-#      define SIG_FILL(set) sigfillset(&set)
-#      define SIGSETMASK(old, new) sigprocmask(SIG_SETMASK, &(new), &(old))
  #   endif
  
+    /* Use POSIX/SYSV interface        */
+#   define SIGSET_T sigset_t
+#   define SIG_DEL(set, signal) sigdelset(&(set), (signal))
+#   define SIG_FILL(set) sigfillset(&set)
+#   define SIGSETMASK(old, new) sigprocmask(SIG_SETMASK, &(new), &(old))
+
+
  static GC_bool mask_initialized = FALSE;
  
  static SIGSET_T new_mask;
@@ -578,7 +581,7 @@ word GC_page_size;
   * With threads, GC_mark_roots needs to know how to do this.
   * Called with allocator lock held.
   */
-# if defined(MSWIN32) || defined(MSWINCE)
+# if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32)
  # define is_writable(prot) ((prot) == PAGE_READWRITE \
                             || (prot) == PAGE_WRITECOPY \
                             || (prot) == PAGE_EXECUTE_READWRITE \
@@ -970,7 +973,8 @@ ptr_t GC_get_main_stack_base(void)
  #endif /* FREEBSD_STACKBOTTOM */
  
  #if !defined(BEOS) && !defined(AMIGA) && !defined(MSWIN32) \
-    && !defined(MSWINCE) && !defined(OS2) && !defined(NOSYS) && !defined(ECOS)
+    && !defined(MSWINCE) && !defined(OS2) && !defined(NOSYS) && !defined(ECOS) \
+    && !defined(CYGWIN32)
  
  ptr_t GC_get_main_stack_base(void)
  {
@@ -2021,8 +2025,7 @@ void GC_default_push_other_roots(void)
  # endif /* PCR */
  
  
-# if defined(GC_SOLARIS_THREADS) || defined(GC_PTHREADS) || \
-     defined(GC_WIN32_THREADS)
+# if defined(GC_PTHREADS) || defined(GC_WIN32_THREADS)
  
  extern void GC_push_all_stacks(void);
  
@@ -2031,7 +2034,7 @@ void GC_default_push_other_roots(void)
      GC_push_all_stacks();
  }
  
-# endif /* GC_SOLARIS_THREADS || GC_PTHREADS */
+# endif /* GC_WIN32_THREADS || GC_PTHREADS */
  
  void (*GC_push_other_roots)(void) = GC_default_push_other_roots;
  
@@ -2182,8 +2185,6 @@ void GC_or_pages(page_hash_table pht1, page_hash_table pht2)
    }
  
  # ifndef MPROTECT_VDB
-    void GC_is_fresh(struct hblk *h, word n)
-    {}
      void GC_remove_protection(struct hblk *h, word nblocks, GC_bool is_ptrfree)
      {}
  # endif
@@ -2235,11 +2236,6 @@ GC_bool GC_page_was_ever_dirty(struct hblk *h)
      return(TRUE);
  }
  
-/* Reset the n pages starting at h to "was never dirty" status.        */
-void GC_is_fresh(struct hblk *h, word n)
-{
-}
-
  /* A call that:                                                */
  /* I) hints that [h, h+nblocks) is about to be written.        */
  /* II) guarantees that protection is removed.          */
@@ -2302,11 +2298,6 @@ void GC_dirty(ptr_t p)
      async_set_pht_entry_from_index(GC_dirty_pages, index);
  }
  
-/* Reset the n pages starting at h to "was never dirty" status.        */
-void GC_is_fresh(struct hblk *h, word n)
-{
-}
-
  /*ARGSUSED*/
  void GC_remove_protection(struct hblk *h, word nblocks, GC_bool is_ptrfree)
  {
@@ -2980,12 +2971,6 @@ GC_bool GC_page_was_ever_dirty(struct hblk *h)
      return(TRUE);
  }
  
-/* Reset the n pages starting at h to "was never dirty" status.        */
-/*ARGSUSED*/
-void GC_is_fresh(struct hblk *h, word n)
-{
-}
-
  # endif /* MPROTECT_VDB */
  
  # ifdef PROC_VDB
@@ -3013,23 +2998,6 @@ void GC_is_fresh(struct hblk *h, word n)
  word GC_proc_buf_size = INITIAL_BUF_SZ;
  char *GC_proc_buf;
  
-#ifdef GC_SOLARIS_THREADS
-/* We don't have exact sp values for threads.  So we count on  */
-/* occasionally declaring stack pages to be fresh.  Thus we    */
-/* need a real implementation of GC_is_fresh.  We can't clear  */
-/* entries in GC_written_pages, since that would declare all   */
-/* pages with the given hash address to be fresh.              */
-#   define MAX_FRESH_PAGES 8*1024      /* Must be power of 2 */
-    struct hblk ** GC_fresh_pages;     /* A direct mapped cache.       */
-                                       /* Collisions are dropped.      */
-
-#   define FRESH_PAGE_SLOT(h) (divHBLKSZ((word)(h)) & (MAX_FRESH_PAGES-1))
-#   define ADD_FRESH_PAGE(h) \
-       GC_fresh_pages[FRESH_PAGE_SLOT(h)] = (h)
-#   define PAGE_IS_FRESH(h) \
-       (GC_fresh_pages[FRESH_PAGE_SLOT(h)] == (h) && (h) != 0)
-#endif
-
  int GC_proc_fd;
  
  void GC_dirty_init(void)
@@ -3060,15 +3028,6 @@ void GC_dirty_init(void)
         ABORT("/proc ioctl failed");
      }
      GC_proc_buf = GC_scratch_alloc(GC_proc_buf_size);
-#   ifdef GC_SOLARIS_THREADS
-       GC_fresh_pages = (struct hblk **)
-         GC_scratch_alloc(MAX_FRESH_PAGES * sizeof (struct hblk *));
-       if (GC_fresh_pages == 0) {
-           GC_err_printf("No space for fresh pages\n");
-           EXIT();
-       }
-       BZERO(GC_fresh_pages, MAX_FRESH_PAGES * sizeof (struct hblk *));
-#   endif
  }
  
  /* Ignore write hints. They don't help us here.        */
@@ -3080,11 +3039,7 @@ GC_bool is_ptrfree;
  {
  }
  
-#ifdef GC_SOLARIS_THREADS
-#   define READ(fd,buf,nbytes) syscall(SYS_read, fd, buf, nbytes)
-#else
-#   define READ(fd,buf,nbytes) read(fd, buf, nbytes)
-#endif
+# define READ(fd,buf,nbytes) read(fd, buf, nbytes)
  
  void GC_read_dirty(void)
  {
@@ -3117,10 +3072,6 @@ void GC_read_dirty(void)
                  /* Punt:       */
                 memset(GC_grungy_pages, 0xff, sizeof (page_hash_table));
                 memset(GC_written_pages, 0xff, sizeof(page_hash_table));
-#              ifdef GC_SOLARIS_THREADS
-                   BZERO(GC_fresh_pages,
-                         MAX_FRESH_PAGES * sizeof (struct hblk *)); 
-#              endif
                 return;
              }
          }
@@ -3147,15 +3098,6 @@ void GC_read_dirty(void)
                         register word index = PHT_HASH(h);
                         
                         set_pht_entry_from_index(GC_grungy_pages, index);
-#                      ifdef GC_SOLARIS_THREADS
-                         {
-                           register int slot = FRESH_PAGE_SLOT(h);
-                           
-                           if (GC_fresh_pages[slot] == h) {
-                               GC_fresh_pages[slot] = 0;
-                           }
-                         }
-#                      endif
                         h++;
                     }
                 }
@@ -3165,30 +3107,16 @@ void GC_read_dirty(void)
         }
      /* Update GC_written_pages. */
          GC_or_pages(GC_written_pages, GC_grungy_pages);
-#   ifdef GC_SOLARIS_THREADS
-      /* Make sure that old stacks are considered completely clean     */
-      /* unless written again.                                         */
-       GC_old_stacks_are_fresh();
-#   endif
  }
  
  #undef READ
  
  GC_bool GC_page_was_dirty(struct hblk *h)
-struct hblk *h;
  {
      register word index = PHT_HASH(h);
      register GC_bool result;
      
      result = get_pht_entry_from_index(GC_grungy_pages, index);
-#   ifdef GC_SOLARIS_THREADS
-       if (result && PAGE_IS_FRESH(h)) result = FALSE;
-       /* This happens only if page was declared fresh since   */
-       /* the read_dirty call, e.g. because it's in an unused  */
-       /* thread stack.  It's OK to treat it as clean, in      */
-       /* that case.  And it's consistent with                 */
-       /* GC_page_was_ever_dirty.                              */
-#   endif
      return(result);
  }
  
@@ -3198,29 +3126,9 @@ GC_bool GC_page_was_ever_dirty(struct hblk *h)
      register GC_bool result;
      
      result = get_pht_entry_from_index(GC_written_pages, index);
-#   ifdef GC_SOLARIS_THREADS
-       if (result && PAGE_IS_FRESH(h)) result = FALSE;
-#   endif
      return(result);
  }
  
-/* Caller holds allocation lock.       */
-void GC_is_fresh(struct hblk *h, word n)
-{
-
-    register word index;
-    
-#   ifdef GC_SOLARIS_THREADS
-      register word i;
-      
-      if (GC_fresh_pages != 0) {
-        for (i = 0; i < n; i++) {
-          ADD_FRESH_PAGE(h + i);
-        }
-      }
-#   endif
-}
-
  # endif /* PROC_VDB */
  
  
diff --git a/pthread_stop_world.c b/pthread_stop_world.c

index bd1f67e..33cc9e0 100644 (file)
--- a/pthread_stop_world.c
+++ b/pthread_stop_world.c
@@ -1,7 +1,7 @@
  #include "private/pthread_support.h"
  
-#if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
-     && !defined(GC_WIN32_THREADS) && !defined(GC_DARWIN_THREADS)
+#if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS) && \
+    !defined(GC_DARWIN_THREADS)
  
  #include <signal.h>
  #include <semaphore.h>
@@ -160,12 +160,12 @@ void GC_suspend_handler_inner(ptr_t sig_arg, void *context)
         return;
      }
  #   ifdef SPARC
-       me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
+       me -> stop_info.stack_ptr = GC_save_regs_in_stack();
  #   else
         me -> stop_info.stack_ptr = (ptr_t)(&dummy);
  #   endif
  #   ifdef IA64
-       me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
+       me -> backing_store_ptr = GC_save_regs_in_stack();
  #   endif
  
      /* Tell the thread that wants to stop the world that this   */
@@ -282,6 +282,8 @@ void GC_push_all_stacks()
                       (unsigned)p -> id, bs_lo, bs_hi);
  #        endif
            if (pthread_equal(p -> id, me)) {
+           /* FIXME:  This may add an unbounded number of entries,     */
+           /* and hence overflow the mark stack, which is bad.         */
             GC_push_all_eager(bs_lo, bs_hi);
           } else {
             GC_push_all_stack(bs_lo, bs_hi);
@@ -340,13 +342,13 @@ int GC_suspend_all()
      return n_live_threads;
  }
  
-/* Caller holds allocation lock.       */
  void GC_stop_world()
  {
      int i;
      int n_live_threads;
      int code;
  
+    GC_ASSERT(I_HOLD_LOCK());
      #if DEBUG_THREADS
        GC_printf("Stopping the world from 0x%x\n", (unsigned)pthread_self());
      #endif
diff --git a/pthread_support.c b/pthread_support.c

index 5b25525..a8c3c6b 100644 (file)
--- a/pthread_support.c
+++ b/pthread_support.c
@@ -80,7 +80,15 @@
  # include <sys/sysctl.h>
  #endif /* GC_DARWIN_THREADS */
  
-
+/* Allocator lock definitions.         */
+#if defined(USE_SPIN_LOCK)
+  pthread_t GC_lock_holder = NO_THREAD;
+#else
+  pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
+  pthread_t GC_lock_holder = NO_THREAD;
+               /* Used only for assertions, and to prevent      */
+               /* recursive reentry in the system call wrapper. */
+#endif
  
  #if defined(GC_DGUX386_THREADS)
  # include <sys/dg_sys_info.h>
@@ -241,7 +249,7 @@ void GC_mark_thread_local_free_lists(void)
             GC_check_tls_for(&(p->tlfs));
           }
         }
-#       if !defined(USE_COMPILER_TLS) && !defined(USE_PTHREAD_SPECIFIC)
+#       if defined(USE_CUSTOM_SPECIFIC)
           if (GC_thread_key != 0)
             GC_check_tsd_marks(GC_thread_key);
  #      endif 
@@ -346,16 +354,15 @@ volatile GC_thread GC_threads[THREAD_TABLE_SZ];
  
  void GC_push_thread_structures(void)
  {
+    GC_ASSERT(I_HOLD_LOCK());
      GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
-#   if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+#   if defined(THREAD_LOCAL_ALLOC)
        GC_push_all((ptr_t)(&GC_thread_key),
           (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
  #   endif
  }
  
-#if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
-#endif /* THREAD_LOCAL_ALLOC */
-
+/* It may not be safe to allocate when we register the first thread.   */
  static struct GC_Thread_Rep first_thread;
  
  /* Add a thread to GC_threads.  We assume it wasn't already there.     */
@@ -385,13 +392,13 @@ GC_thread GC_new_thread(pthread_t id)
  
  /* Delete a thread from GC_threads.  We assume it is there.    */
  /* (The code intentionally traps if it wasn't.)                        */
-/* Caller holds allocation lock.                               */
  void GC_delete_thread(pthread_t id)
  {
      int hv = ((word)id) % THREAD_TABLE_SZ;
      register GC_thread p = GC_threads[hv];
      register GC_thread prev = 0;
      
+    GC_ASSERT(I_HOLD_LOCK());
      while (!pthread_equal(p -> id, id)) {
          prev = p;
          p = p -> next;
@@ -408,12 +415,14 @@ void GC_delete_thread(pthread_t id)
  /* been notified, then there may be more than one thread       */
  /* in the table with the same pthread id.                      */
  /* This is OK, but we need a way to delete a specific one.     */
-void GC_delete_gc_thread(pthread_t id, GC_thread gc_id)
+void GC_delete_gc_thread(GC_thread gc_id)
  {
+    pthread_t id = gc_id -> id;
      int hv = ((word)id) % THREAD_TABLE_SZ;
      register GC_thread p = GC_threads[hv];
      register GC_thread prev = 0;
  
+    GC_ASSERT(I_HOLD_LOCK());
      while (p != gc_id) {
          prev = p;
          p = p -> next;
@@ -680,7 +689,8 @@ void GC_thr_init(void)
  #       if defined(GC_HPUX_THREADS)
           GC_nprocs = pthread_num_processors_np();
  #       endif
-#      if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS)
+#      if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS) \
+          || defined(GC_SOLARIS_THREADS)
           GC_nprocs = sysconf(_SC_NPROCESSORS_ONLN);
           if (GC_nprocs <= 0) GC_nprocs = 1;
  #      endif
@@ -742,7 +752,7 @@ void GC_thr_init(void)
  /* may require allocation.                             */
  /* Called without allocation lock.                     */
  /* Must be called before a second thread is created.   */
-/* Called without allocation lock.                     */
+/* Did we say it's called without the allocation lock? */
  void GC_init_parallel(void)
  {
      if (parallel_initialized) return;
@@ -751,7 +761,7 @@ void GC_init_parallel(void)
      /* GC_init() calls us back, so set flag first.     */
      if (!GC_is_initialized) GC_init();
      /* Initialize thread local free lists if used.     */
-#   if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+#   if defined(THREAD_LOCAL_ALLOC)
        LOCK();
        GC_init_thread_local(&(GC_lookup_thread(pthread_self())->tlfs));
        UNLOCK();
@@ -789,12 +799,12 @@ static void GC_do_blocking_inner(ptr_t data, void * context) {
      me = GC_lookup_thread(pthread_self());
      GC_ASSERT(!(me -> thread_blocked));
  #   ifdef SPARC
-       me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
+       me -> stop_info.stack_ptr = GC_save_regs_in_stack();
  #   elif !defined(GC_DARWIN_THREADS)
-       me -> stop_info.stack_ptr = (ptr_t)GC_approx_sp();
+       me -> stop_info.stack_ptr = GC_approx_sp();
  #   endif
  #   ifdef IA64
-       me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
+       me -> backing_store_ptr = GC_save_regs_in_stack();
  #   endif
      me -> thread_blocked = TRUE;
      /* Save context here if we want to support precise stack marking */
@@ -830,13 +840,17 @@ int GC_unregister_my_thread(void)
      /* complete before we remove this thread.                  */
      GC_wait_for_gc_completion(FALSE);
      me = GC_lookup_thread(pthread_self());
-    GC_destroy_thread_local(&(me->tlfs));
+#   if defined(THREAD_LOCAL_ALLOC)
+      GC_destroy_thread_local(&(me->tlfs));
+#   endif
      if (me -> flags & DETACHED) {
         GC_delete_thread(pthread_self());
      } else {
         me -> flags |= FINISHED;
      }
-    GC_remove_specific(GC_thread_key);
+#   if defined(THREAD_LOCAL_ALLOC)
+      GC_remove_specific();
+#   endif
      UNLOCK();
      return GC_SUCCESS;
  }
@@ -877,7 +891,7 @@ int WRAP_FUNC(pthread_join)(pthread_t thread, void **retval)
      if (result == 0) {
          LOCK();
          /* Here the pthread thread id may have been recycled. */
-        GC_delete_gc_thread(thread, thread_gc_id);
+        GC_delete_gc_thread(thread_gc_id);
          UNLOCK();
      }
      return result;
@@ -899,7 +913,7 @@ WRAP_FUNC(pthread_detach)(pthread_t thread)
        thread_gc_id -> flags |= DETACHED;
        /* Here the pthread thread id may have been recycled. */
        if (thread_gc_id -> flags & FINISHED) {
-        GC_delete_gc_thread(thread, thread_gc_id);
+        GC_delete_gc_thread(thread_gc_id);
        }
        UNLOCK();
      }
@@ -913,7 +927,7 @@ GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
  {
      GC_thread me;
  
-    GC_in_thread_creation = TRUE; /* OK to collect from unknow thread. */
+    GC_in_thread_creation = TRUE; /* OK to collect from unknown thread. */
      me = GC_new_thread(my_pthread);
      GC_in_thread_creation = FALSE;
  #   ifdef GC_DARWIN_THREADS
@@ -975,7 +989,7 @@ void * GC_inner_start_routine(struct GC_stack_base *sb, void * arg)
      sem_post(&(si -> registered));     /* Last action on si.   */
                                         /* OK to deallocate.    */
      pthread_cleanup_push(GC_thread_exit_proc, 0);
-#   if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+#   if defined(THREAD_LOCAL_ALLOC)
         LOCK();
          GC_init_thread_local(&(me->tlfs));
         UNLOCK();
diff --git a/reclaim.c b/reclaim.c

index 6cb8b47..1149f49 100644 (file)
--- a/reclaim.c
+++ b/reclaim.c
@@ -289,7 +289,12 @@ void GC_reclaim_block(struct hblk *hbp, word report_if_found)
         }
      } else {
          GC_bool empty = GC_block_empty(hhdr);
-       GC_ASSERT(sz * hhdr -> hb_n_marks <= HBLKSIZE);
+#      ifdef PARALLEL_MARK
+         /* Count can be low or one too high.  */
+         GC_ASSERT(hhdr -> hb_n_marks <= HBLKSIZE/sz + 1);
+#      else
+         GC_ASSERT(sz * hhdr -> hb_n_marks <= HBLKSIZE);
+#      endif
         if (hhdr -> hb_descr != 0) {
           GC_composite_in_use += sz * hhdr -> hb_n_marks;
         } else {
@@ -387,7 +392,7 @@ int GC_n_set_marks(hdr *hhdr)
  #endif /* !USE_MARK_BYTES  */
  
  /*ARGSUSED*/
-void GC_print_block_descr(struct hblk *h, word dummy)
+void GC_print_block_descr(struct hblk *h, word /* struct PrintStats */ raw_ps)
  {
      hdr * hhdr = HDR(h);
      unsigned bytes = hhdr -> hb_sz;
@@ -405,7 +410,7 @@ void GC_print_block_descr(struct hblk *h, word dummy)
      bytes += HBLKSIZE-1;
      bytes &= ~(HBLKSIZE-1);
  
-    ps = (struct Print_stats *)dummy;
+    ps = (struct Print_stats *)raw_ps;
      ps->total_bytes += bytes;
      ps->number_of_blocks++;
  }
diff --git a/setjmp_t.c b/setjmp_t.c

index 648c712..9dc6bfc 100644 (file)
--- a/setjmp_t.c
+++ b/setjmp_t.c
@@ -24,7 +24,7 @@
  #include <stdio.h>
  #include <setjmp.h>
  #include <string.h>
-#include "private/gcconfig.h"
+#include "private/gc_priv.h"
  
  #ifdef OS2
  /* GETPAGESIZE() is set to getpagesize() by default, but that  */
@@ -82,6 +82,9 @@ int main()
         printf("A good guess for ALIGNMENT on this machine is %ld.\n",
                (unsigned long)(&(a.a_b))-(unsigned long)(&a));
         
+       printf("The following is a very dubious test of one root marking"
+              " strategy.\n");
+       printf("Results may not be accurate/useful:\n");
         /* Encourage the compiler to keep x in a callee-save register */
         x = 2*x-1;
         printf("");
@@ -107,6 +110,27 @@ int main()
         y++;
         x = 2;
         if (y == 1) longjmp(b,1);
+       printf("Some GC internal configuration stuff: \n");
+       printf("\tWORDSZ = %d, ALIGNMENT = %d, GC_GRANULE_BYTES = %d\n",
+              WORDSZ, ALIGNMENT, GC_GRANULE_BYTES);
+       printf("\tUsing one mark ");
+#       if defined(USE_MARK_BYTES)
+         printf("byte");
+#      elif defined(USE_MARK_BITS)
+         printf("bit");
+#       endif
+       printf(" per ");
+#       if defined(MARK_BIT_PER_OBJ)
+         printf("object.\n");
+#      elif defined(MARK_BIT_PER_GRANULE)
+         printf("granule.\n");
+#      endif
+#      ifdef THREAD_LOCAL_ALLOC
+         printf("Thread local allocation enabled.\n");
+#      endif
+#      ifdef PARALLEL_MARK
+         printf("Parallel marking enabled.\n");
+#      endif
         return(0);
  }
  
diff --git a/sparc_mach_dep.S b/sparc_mach_dep.S

index 06a0f3b..6997fa1 100644 (file)
--- a/sparc_mach_dep.S
+++ b/sparc_mach_dep.S
@@ -24,6 +24,10 @@ GC_push_regs:
         .size GC_save_regs_in_stack,.GC_save_regs_in_stack_end-GC_save_regs_in_stack
         
  
+! GC_clear_stack_inner(arg, limit) clears stack area up to limit and
+! returns arg.  Stack clearing is crucial on SPARC, so we supply
+! an assembly version that's more careful.  Assumes limit is hotter
+! than sp, and limit is 8 byte aligned.        
         .globl  GC_clear_stack_inner
  GC_clear_stack_inner:
  #if defined(__arch64__) || defined(__sparcv9)
diff --git a/tests/leak_test.c b/tests/leak_test.c

index 421d0c6..d6a60d4 100644 (file)
--- a/tests/leak_test.c
+++ b/tests/leak_test.c
@@ -5,6 +5,9 @@ main() {
      int i;
      GC_find_leak = 1; /* for new collect versions not compiled  */
      /* with -DFIND_LEAK.                                        */
+
+    GC_INIT(); /* Needed if thread-local allocation is enabled.        */
+               /* FIXME: This is not ideal.                            */
      for (i = 0; i < 10; ++i) {
          p[i] = malloc(sizeof(int)+i);
      }
@@ -18,4 +21,5 @@ main() {
      CHECK_LEAKS();
      CHECK_LEAKS();
      CHECK_LEAKS();
+    return 0;
  }       
diff --git a/tests/test.c b/tests/test.c

index 076ce4b..afe62f2 100644 (file)
--- a/tests/test.c
+++ b/tests/test.c
@@ -36,12 +36,10 @@
  # else
  #   include <assert.h>        /* Not normally used, but handy for debugging. */
  # endif
-# include <assert.h>   /* Not normally used, but handy for debugging. */
  # include "gc.h"
  # include "gc_typed.h"
  # include "private/gc_priv.h"  /* For output, locking, MIN_WORDS,      */
-                               /* and some statistics.                 */
-# include "private/gcconfig.h"
+                               /* and some statistics, and gcconfig.h. */
  
  # if defined(MSWIN32) || defined(MSWINCE)
  #   include <windows.h>
@@ -53,11 +51,6 @@
  #   define GC_printf printf
  # endif
  
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-#   include <thread.h>
-#   include <synch.h>
-# endif
-
  # if defined(GC_PTHREADS)
  #   include <pthread.h>
  # endif
@@ -507,8 +500,6 @@ void check_marks_int_list(sexpr x)
         }
      }
  
-/* # elif defined(GC_SOLARIS_THREADS) */
-
  # else
  
  #   define fork_a_thread()
@@ -674,17 +665,11 @@ volatile int dropped_something = 0;
  # ifdef PCR
       PCR_ThCrSec_EnterSys();
  # endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-    static mutex_t incr_lock;
-    mutex_lock(&incr_lock);
-# endif
-# if  defined(GC_PTHREADS)
+# if defined(GC_PTHREADS)
      static pthread_mutex_t incr_lock = PTHREAD_MUTEX_INITIALIZER;
      pthread_mutex_lock(&incr_lock);
-# else
-#   ifdef GC_WIN32_THREADS
-      EnterCriticalSection(&incr_cs);
-#   endif
+# elif defined(GC_WIN32_THREADS)
+    EnterCriticalSection(&incr_cs);
  # endif
    if ((int)(GC_word)client_data != t -> level) {
       (void)GC_printf("Wrong finalization data - collector is broken\n");
@@ -695,15 +680,10 @@ volatile int dropped_something = 0;
  # ifdef PCR
      PCR_ThCrSec_ExitSys();
  # endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-    mutex_unlock(&incr_lock);
-# endif
  # if defined(GC_PTHREADS)
      pthread_mutex_unlock(&incr_lock);
-# else
-#   ifdef GC_WIN32_THREADS
-      LeaveCriticalSection(&incr_cs);
-#   endif
+# elif defined(GC_WIN32_THREADS)
+    LeaveCriticalSection(&incr_cs);
  # endif
  }
  
@@ -757,17 +737,11 @@ int n;
  #        ifdef PCR
             PCR_ThCrSec_EnterSys();
  #        endif
-#        if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-           static mutex_t incr_lock;
-           mutex_lock(&incr_lock);
-#        endif
  #         if defined(GC_PTHREADS)
              static pthread_mutex_t incr_lock = PTHREAD_MUTEX_INITIALIZER;
              pthread_mutex_lock(&incr_lock);
-#         else
-#           ifdef GC_WIN32_THREADS
-              EnterCriticalSection(&incr_cs);
-#           endif
+#         elif defined(GC_WIN32_THREADS)
+            EnterCriticalSection(&incr_cs);
  #         endif
                 /* Losing a count here causes erroneous report of failure. */
            finalizable_count++;
@@ -775,15 +749,10 @@ int n;
  #        ifdef PCR
             PCR_ThCrSec_ExitSys();
  #        endif
-#        if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-           mutex_unlock(&incr_lock);
-#        endif
  #        if defined(GC_PTHREADS)
             pthread_mutex_unlock(&incr_lock);
-#        else
-#           ifdef GC_WIN32_THREADS
-              LeaveCriticalSection(&incr_cs);
-#           endif
+#        elif defined(GC_WIN32_THREADS)
+            LeaveCriticalSection(&incr_cs);
  #         endif
         }
  
@@ -841,48 +810,8 @@ int n;
      chktree(t -> rchild, n-1);
  }
  
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-thread_key_t fl_key;
  
-void * alloc8bytes()
-{
-# if defined(SMALL_CONFIG) || defined(GC_DEBUG)
-    collectable_count++;
-    return(GC_MALLOC(8));
-# else
-    void ** my_free_list_ptr;
-    void * my_free_list;
-    
-    if (thr_getspecific(fl_key, (void **)(&my_free_list_ptr)) != 0) {
-       (void)GC_printf("thr_getspecific failed\n");
-       FAIL;
-    }
-    if (my_free_list_ptr == 0) {
-        uncollectable_count++;
-        my_free_list_ptr = GC_NEW_UNCOLLECTABLE(void *);
-        if (thr_setspecific(fl_key, my_free_list_ptr) != 0) {
-           (void)GC_printf("thr_setspecific failed\n");
-           FAIL;
-        }
-    }
-    my_free_list = *my_free_list_ptr;
-    if (my_free_list == 0) {
-        collectable_count++;
-        my_free_list = GC_malloc_many(8);
-        if (my_free_list == 0) {
-            (void)GC_printf("alloc8bytes out of memory\n");
-           FAIL;
-        }
-    }
-    *my_free_list_ptr = GC_NEXT(my_free_list);
-    GC_NEXT(my_free_list) = 0;
-    return(my_free_list);
-# endif
-}
-
-#else
-
-# if defined(GC_PTHREADS)
+#if defined(GC_PTHREADS)
  pthread_key_t fl_key;
  
  void * alloc8bytes()
@@ -918,9 +847,8 @@ void * alloc8bytes()
  # endif
  }
  
-# else
+#else
  #   define alloc8bytes() GC_MALLOC_ATOMIC(8)
-# endif
  #endif
  
  void alloc_small(n)
@@ -1099,7 +1027,7 @@ static void uniq(void *p, ...) {
      for (j=0; j<i; j++)
        if (q[i] == q[j]) {
          GC_printf(
-              "Apparently failed to mark form some function arguments.\n"
+              "Apparently failed to mark from some function arguments.\n"
                "Perhaps GC_push_regs was configured incorrectly?\n"
          );
         FAIL;
@@ -1389,7 +1317,7 @@ void SetMinimumStack(long minSize)
  }
  
  
-#if !defined(PCR) && !defined(GC_SOLARIS_THREADS) \
+#if !defined(PCR) \
      && !defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS) \
      || defined(LINT)
  #if defined(MSWIN32) && !defined(__MINGW32__)
diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc

index 6661e41..9a7af1c 100644 (file)
--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@@ -52,7 +52,7 @@ extern "C" {
  
  #define my_assert( e ) \
      if (! (e)) { \
-        GC_printf1( "Assertion failure in " __FILE__ ", line %d: " #e "\n", \
+        GC_printf( "Assertion failure in " __FILE__ ", line %d: " #e "\n", \
                      __LINE__ ); \
          exit( 1 ); }
  
@@ -216,11 +216,11 @@ int APIENTRY WinMain(
        x = 0;
  #   endif
      if (argc != 2 || (0 >= (n = atoi( argv[ 1 ] )))) {
-        GC_printf0( "usage: test_cpp number-of-iterations\nAssuming 10 iters\n" );
+        GC_printf( "usage: test_cpp number-of-iterations\nAssuming 10 iters\n" );
          n = 10;}
          
      for (iters = 1; iters <= n; iters++) {
-        GC_printf1( "Starting iteration %d\n", iters );
+        GC_printf( "Starting iteration %d\n", iters );
  
              /* Allocate some uncollectable As and disguise their pointers.
              Later we'll check to see if the objects are still there.  We're
@@ -282,7 +282,7 @@ int APIENTRY WinMain(
        x = *xptr;
  #   endif
      my_assert (29 == x[0]);
-    GC_printf0( "The test appears to have succeeded.\n" );
+    GC_printf( "The test appears to have succeeded.\n" );
      return( 0 );}
      
  
diff --git a/tests/thread_leak_test.c b/tests/thread_leak_test.c

index 1174705..5f183cf 100644 (file)
--- a/tests/thread_leak_test.c
+++ b/tests/thread_leak_test.c
@@ -37,4 +37,5 @@ main() {
      CHECK_LEAKS();
      CHECK_LEAKS();
      CHECK_LEAKS();
+    return 0;
  }
diff --git a/thread_local_alloc.c b/thread_local_alloc.c

index 0961794..b3fe28c 100644 (file)
--- a/thread_local_alloc.c
+++ b/thread_local_alloc.c
@@ -12,37 +12,17 @@
   */
  #include "private/gc_priv.h"
  
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
  
  #include "private/thread_local_alloc.h"
  #include "gc_inline.h"
  
-# if defined(GC_HPUX_THREADS) && !defined(USE_PTHREAD_SPECIFIC) \
-     && !defined(USE_COMPILER_TLS)
-#   ifdef __GNUC__
-#     define USE_PTHREAD_SPECIFIC
-      /* Empirically, as of gcc 3.3, USE_COMPILER_TLS doesn't work.    */
-#   else
-#     define USE_COMPILER_TLS
-#   endif
-# endif
-
-# if defined USE_HPUX_TLS
-#   error USE_HPUX_TLS macro was replaced by USE_COMPILER_TLS
-# endif
-
-# if (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \
-      defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS)) \
-      && !defined(USE_PTHREAD_SPECIFIC)
-#   define USE_PTHREAD_SPECIFIC
-# endif
-
  # include <stdlib.h>
  
-/* We don't really support thread-local allocation with DBG_HDRS_ALL */
-
-#ifdef USE_COMPILER_TLS
+#if defined(USE_COMPILER_TLS)
    __thread
+#elif defined(USE_WIN32_COMPILER_TLS)
+  declspec(thread)
  #endif
  GC_key_t GC_thread_key;
  
@@ -56,7 +36,6 @@ static void return_freelists(void **fl, void **gfl)
      void *q, **qptr;
  
      for (i = 1; i < TINY_FREELISTS; ++i) {
-#if 0
         if ((word)(fl[i]) >= HBLKSIZE) {
           if (gfl[i] == 0) {
             gfl[i] = fl[i];
@@ -71,7 +50,6 @@ static void return_freelists(void **fl, void **gfl)
               gfl[i] = fl[i];
           }
         }
-#endif
         /* Clear fl[i], since the thread structure may hang around.     */
         /* Do it in a way that is likely to trap if we access it.       */
         fl[i] = (ptr_t)HBLKSIZE;
@@ -134,11 +112,16 @@ void GC_destroy_thread_local(GC_tlfs p)
  #   endif
  }
  
-#if defined(GC_ASSERTIONS) && defined(GC_LINUX_THREADS)
+#if defined(GC_ASSERTIONS) && defined(GC_PTHREADS) && !defined(CYGWIN32)
  # include <pthread.h>
    extern char * GC_lookup_thread(pthread_t id);
  #endif
  
+#if defined(GC_ASSERTIONS) && defined(GC_WIN32_THREADS)
+# include <pthread.h>
+  extern char * GC_lookup_thread(int id);
+#endif
+
  void * GC_malloc(size_t bytes)
  {
      size_t granules = ROUNDED_UP_GRANULES(bytes);
@@ -164,10 +147,14 @@ void * GC_malloc(size_t bytes)
  #   endif
  #   ifdef GC_ASSERTIONS
        /* We can't check tsd correctly, since we don't have access to   */
-      /* the right declarations.  But we cna check that it's close.    */
+      /* the right declarations.  But we can check that it's close.    */
        LOCK();
        {
-       char * me = GC_lookup_thread(pthread_self());
+#      if defined(GC_WIN32_THREADS)
+         char * me = (char *)GC_lookup_thread_inner(GetCurrentThreadId());
+#       else
+         char * me = GC_lookup_thread(pthread_self());
+#      endif
          GC_ASSERT((char *)tsd > me && (char *)tsd < me + 1000);
        }
        UNLOCK();
@@ -273,7 +260,7 @@ void GC_mark_thread_local_fls_for(GC_tlfs p)
      }
  #endif /* GC_ASSERTIONS */
  
-# else  /* !THREAD_LOCAL_ALLOC  && !DBG_HDRS_ALL */
+# else  /* !THREAD_LOCAL_ALLOC  */
  
  #   define GC_destroy_thread_local(t)
  
diff --git a/threadlibs.c b/threadlibs.c

index 178a7ec..1309694 100644 (file)
--- a/threadlibs.c
+++ b/threadlibs.c
@@ -11,7 +11,6 @@ int main()
                "-Wl,--wrap -Wl,pthread_sigmask -Wl,--wrap -Wl,sleep\n");
  #   endif
  #   if defined(GC_LINUX_THREADS) || defined(GC_IRIX_THREADS) \
-       || defined(GC_SOLARIS_PTHREADS) \
         || defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS)
  #       ifdef GC_USE_DLOPEN_WRAP
           printf("-ldl ");
@@ -31,8 +30,9 @@ int main()
  #   if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS)
         printf("-lpthread -lrt\n");
  #   endif
-#   if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-        printf("-lthread -ldl\n");
+#   if defined(GC_SOLARIS_THREADS) || defined(GC_SOLARIS_PTHREADS)
+        printf("-lthread -lposix4\n");
+               /* Is this right for recent versions? */
  #   endif
  #   if defined(GC_WIN32_THREADS) && defined(CYGWIN32)
          printf("-lpthread\n");
diff --git a/typd_mlc.c b/typd_mlc.c

index cdedf46..1124ff9 100644 (file)
--- a/typd_mlc.c
+++ b/typd_mlc.c
@@ -587,9 +587,9 @@ void * GC_malloc_explicitly_typed(size_t lb, GC_descr d)
      if(SMALL_OBJ(lb)) {
         lg = GC_size_map[lb];
         opp = &(GC_eobjfreelist[lg]);
-       FASTLOCK();
-        if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
-            FASTUNLOCK();
+       LOCK();
+        if( (op = *opp) == 0 ) {
+            UNLOCK();
              op = (ptr_t)GENERAL_MALLOC((word)lb, GC_explicit_kind);
             if (0 == op) return 0;
             lg = GC_size_map[lb];       /* May have been uninitialized. */
@@ -597,7 +597,7 @@ void * GC_malloc_explicitly_typed(size_t lb, GC_descr d)
              *opp = obj_link(op);
             obj_link(op) = 0;
              GC_bytes_allocd += GRANULES_TO_BYTES(lg);
-            FASTUNLOCK();
+            UNLOCK();
          }
     } else {
         op = (ptr_t)GENERAL_MALLOC((word)lb, GC_explicit_kind);
@@ -620,16 +620,16 @@ DCL_LOCK_STATE;
      if( SMALL_OBJ(lb) ) {
         lg = GC_size_map[lb];
         opp = &(GC_eobjfreelist[lg]);
-       FASTLOCK();
-        if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
-            FASTUNLOCK();
+       LOCK();
+        if( (op = *opp) == 0 ) {
+            UNLOCK();
              op = (ptr_t)GENERAL_MALLOC_IOP(lb, GC_explicit_kind);
             lg = GC_size_map[lb];       /* May have been uninitialized. */
          } else {
              *opp = obj_link(op);
             obj_link(op) = 0;
              GC_bytes_allocd += GRANULES_TO_BYTES(lg);
-            FASTUNLOCK();
+            UNLOCK();
          }
     } else {
         op = (ptr_t)GENERAL_MALLOC_IOP(lb, GC_explicit_kind);
@@ -669,9 +669,9 @@ DCL_LOCK_STATE;
      if( SMALL_OBJ(lb) ) {
         lg = GC_size_map[lb];
         opp = &(GC_arobjfreelist[lg]);
-       FASTLOCK();
-        if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
-            FASTUNLOCK();
+       LOCK();
+        if( (op = *opp) == 0 ) {
+            UNLOCK();
              op = (ptr_t)GENERAL_MALLOC((word)lb, GC_array_kind);
             if (0 == op) return(0);
             lg = GC_size_map[lb];       /* May have been uninitialized. */            
@@ -679,7 +679,7 @@ DCL_LOCK_STATE;
              *opp = obj_link(op);
             obj_link(op) = 0;
              GC_bytes_allocd += GRANULES_TO_BYTES(lg);
-            FASTUNLOCK();
+            UNLOCK();
          }
     } else {
         op = (ptr_t)GENERAL_MALLOC((word)lb, GC_array_kind);
diff --git a/version.h b/version.h

index 9c34ccc..b71dd6b 100644 (file)
--- a/version.h
+++ b/version.h
@@ -3,7 +3,7 @@
  /* it to keep the old-style build process working.             */
  #define GC_TMP_VERSION_MAJOR 7
  #define GC_TMP_VERSION_MINOR 0
-#define GC_TMP_ALPHA_VERSION 4
+#define GC_TMP_ALPHA_VERSION 5
  
  #ifndef GC_NOT_ALPHA
  #   define GC_NOT_ALPHA 0xff
diff --git a/win32_threads.c b/win32_threads.c

index 8609900..214d5c1 100755 (executable)
--- a/win32_threads.c
+++ b/win32_threads.c
@@ -4,6 +4,24 @@
  
  #include <windows.h>
  
+#ifdef THREAD_LOCAL_ALLOC
+# include "private/thread_local_alloc.h"
+#endif /* THREAD_LOCAL_ALLOC */
+
+/* Allocation lock declarations.       */
+#if !defined(USE_PTHREAD_LOCKS)
+# if defined(GC_DLL)
+    __declspec(dllexport) CRITICAL_SECTION GC_allocate_ml;
+# else
+    CRITICAL_SECTION GC_allocate_ml;
+# endif
+  DWORD GC_lock_holder = NO_THREAD;
+       /* Thread id for current holder of allocation lock */
+#else
+  pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
+  pthread_t GC_lock_holder = NO_THREAD;
+#endif
+
  #ifdef CYGWIN32
  # include <errno.h>
  
@@ -16,41 +34,102 @@
  
  # define DEBUG_CYGWIN_THREADS 0
  
-  void * GC_start_routine(void * arg);
+  void * GC_pthread_start(void * arg);
    void GC_thread_exit_proc(void *arg);
  
  # include <pthread.h>
  
  #endif
  
+#if defined(GC_DLL) && !defined(MSWINCE)
+  static GC_bool GC_win32_dll_threads = FALSE;
+  /* This code operates in two distinct modes, depending on    */
+  /* the setting of GC_win32_dll_threads.  If                  */
+  /* GC_win32_dll_threads is set, all threads in the process   */
+  /* are implicitly registered with the GC by DllMain.                 */
+  /* No explicit registration is required, and attempts at     */
+  /* explicit registration are ignored.  This mode is          */
+  /* very different from the Posix operation of the collector. */
+  /* In this mode access to the thread table is lock-free.     */
+  /* Hence there is a static limit on the number of threads.   */
+  
+  /* If GC_win32_dll_threads is FALSE, or the collector is     */
+  /* built without GC_DLL defined, things operate in a way     */
+  /* that is very similar to Posix platforms, and new threads  */
+  /* must be registered with the collector, e.g. by using      */
+  /* preprocessor-based interception of the thread primitives. */
+  /* In this case, we use a real data structure for the thread */
+  /* table.  Note that there is no equivalent of linker-based  */
+  /* call interception, since we don't have ELF-like           */
+  /* facilities.  The Windows analog appears to be "API                */
+  /* hooking", which really seems to be a standard way to      */
+  /* do minor binary rewriting (?).  I'd prefer not to have    */
+  /* the basic collector rely on such facilities, but an       */
+  /* optional package that intercepts thread calls this way    */
+  /* would probably be nice.                                   */
+
+  /* GC_win32_dll_threads must be set at initialization time,  */
+  /* i.e. before any collector or thread calls.  We make it a  */
+  /* "dynamic" option only to avoid multiple library versions. */
+#else
+# define GC_win32_dll_threads FALSE
+#endif
+
  /* The type of the first argument to InterlockedExchange.      */
  /* Documented to be LONG volatile *, but at least gcc likes    */
  /* this better.                                                        */
  typedef LONG * IE_t;
  
-#ifndef MAX_THREADS
-# define MAX_THREADS 256
-    /* FIXME:                                                  */
-    /* Things may get quite slow for large numbers of threads, */
-    /* since we look them up with sequential search.           */
-#endif
-
  GC_bool GC_thr_initialized = FALSE;
  
+GC_bool GC_need_to_lock = FALSE;
+
+static GC_bool parallel_initialized = FALSE;
+
+void GC_init_parallel(void);
+
  #ifdef GC_DLL
-  GC_API GC_bool GC_need_to_lock = TRUE;
+  /* Turn on GC_win32_dll_threads      */
+  GC_API void GC_use_DllMain(void)
+  {
+#     ifdef THREAD_LOCAL_ALLOC
+         ABORT("Cannot use thread local allocation with DllMain-based "
+               "thread registration.");
+         /* Thread-local allocation really wants to lock at thread     */
+         /* entry and exit.                                            */
+#     endif
+      GC_need_to_lock = TRUE;
         /* Cannot intercept thread creation.    */
+      GC_ASSERT(GC_gc_no == 0);
+      GC_win32_dll_threads = TRUE;
+  }
  #else
-  GC_bool GC_need_to_lock = FALSE;
+  GC_API void GC_use_DllMain(void)
+  {
+      ABORT("GC not configured as DLL");
+  }
  #endif
  
  DWORD GC_main_thread = 0;
  
-struct GC_thread_Rep {
-  AO_t in_use;                 /* Updated without lock.        */
-                       /* We assert that unused        */
-                       /* entries have invalid ids of  */
-                       /* zero and zero stack fields.  */
+struct GC_Thread_Rep {
+  union {
+    AO_t tm_in_use;    /* Updated without lock.                */
+                       /* We assert that unused                */
+                       /* entries have invalid ids of          */
+                       /* zero and zero stack fields.          */
+                       /* Used only with GC_win32_dll_threads. */
+    struct GC_Thread_Rep * tm_next;
+                       /* Hash table link without              */
+                       /* GC_win32_dll_threads.                */
+                       /* More recently allocated threads      */
+                       /* with a given pthread id come         */
+                       /* first.  (All but the first are       */
+                       /* guaranteed to be dead, but we may    */
+                       /* not yet have registered the join.)   */
+  } table_management;
+# define in_use table_management.tm_in_use
+# define next table_management.tm_next
    DWORD id;
    HANDLE handle;
    ptr_t stack_base;    /* The cold end of the stack.   */
@@ -65,9 +144,13 @@ struct GC_thread_Rep {
  #      define FINISHED 1       /* Thread has exited.   */
  #      define DETACHED 2       /* Thread is intended to be detached.   */
  # endif
+# ifdef THREAD_LOCAL_ALLOC
+    struct thread_local_freelists tlfs;
+# endif
  };
  
-typedef volatile struct GC_thread_Rep * GC_thread;
+typedef struct GC_Thread_Rep * GC_thread;
+
  
  /*
   * We assumed that volatile ==> memory ordering, at least among
@@ -76,36 +159,78 @@ typedef volatile struct GC_thread_Rep * GC_thread;
  
  volatile GC_bool GC_please_stop = FALSE;
  
-/*
- * FIXME: At initialization time we should perhaps chose
- * between two different thread table representations.  This simple
- * linear representation may be the best we can reliably do if we use
- * DllMain.  By default we should probably rely on thread registration
- * as with the other platforms, and use a hash table or other real
- * data structure.
- */
-volatile struct GC_thread_Rep thread_table[MAX_THREADS];
+/* We have two versions of the thread table.  Which one        */
+/* we us depends on whether or not GC_win32_dll_threads */
+/* is set.  The one complication is that at process    */
+/* startup, we use both, since the client hasn't yet   */
+/* had a chance to tell us which one (s)he wants.      */
+static GC_bool client_has_run = FALSE;
+
+/* Thread table used if GC_win32_dll_threads is set.   */
+/* This is a fixed size array.                         */
+/* Since we use runtime conditionals, both versions    */
+/* are always defined.                                 */
+# ifndef MAX_THREADS
+#   define MAX_THREADS 512
+#  endif
+  /* Things may get quite slow for large numbers of threads,   */
+  /* since we look them up with sequential search.             */
+
+  volatile struct GC_Thread_Rep dll_thread_table[MAX_THREADS];
+
+  volatile LONG GC_max_thread_index = 0;
+                       /* Largest index in dll_thread_table    */
+                       /* that was ever used.                  */
+
+/* And now the version used if GC_win32_dll_threads is not set.        */
+/* This is a chained hash table, with much of the code borrowed        */
+/* From the Posix implementation.                              */
+# define THREAD_TABLE_SZ 256   /* Must be power of 2   */
+  volatile GC_thread GC_threads[THREAD_TABLE_SZ];
+  
  
-volatile LONG GC_max_thread_index = 0; /* Largest index in thread_table        */
-                                      /* that was ever used.           */
+/* Add a thread to GC_threads.  We assume it wasn't already there.     */
+/* Caller holds allocation lock.                                       */
+/* Unlike the pthreads version, the id field is set by the caller.     */
+GC_thread GC_new_thread(DWORD id)
+{
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    GC_thread result;
+    /* It may not be safe to allocate when we register the first thread. */
+    static struct GC_Thread_Rep first_thread;
+    static GC_bool first_thread_used = FALSE;
+    
+    GC_ASSERT(I_HOLD_LOCK());
+    if (!first_thread_used) {
+       result = &first_thread;
+       first_thread_used = TRUE;
+    } else {
+        GC_ASSERT(!GC_win32_dll_threads);
+        result = (struct GC_Thread_Rep *)
+                GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL);
+       GC_ASSERT(result -> flags == 0);
+    }
+    if (result == 0) return(0);
+    /* result -> id = id; Done by caller.      */
+    result -> next = GC_threads[hv];
+    GC_threads[hv] = result;
+    GC_ASSERT(result -> flags == 0 /* && result -> thread_blocked == 0 */);
+    return(result);
+}
  
  extern LONG WINAPI GC_write_fault_handler(struct _EXCEPTION_POINTERS *exc_info);
  
  /*
   * This may be called from DllMain, and hence operates under unusual
- * constraints.  In particular, it must be lock-free.
- * Always called from the thread being added.
+ * constraints.  In particular, it must be lock-free if GC_win32_dll_threads
+ * is set.  Always called from the thread being added.
+ * If GC_win32_dll_threads is not set, we already hold the allocation lock,
+ * except possibly during single-threaded start-up code.
   */
  static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
                                              DWORD thread_id)
  {
-  int i;
-  /* It appears to be unsafe to acquire a lock here, since this        */
-  /* code is apparently not preeemptible on some systems.      */
-  /* (This is based on complaints, not on Microsoft's official */
-  /* documentation, which says this should perform "only simple        */
-  /* initialization tasks".)                                   */
-  /* Hence we make do with nonblocking synchronization.                */
+  volatile struct GC_Thread_Rep * me;
  
    /* The following should be a noop according to the win32     */
    /* documentation.  There is empirical evidence that it       */
@@ -113,39 +238,60 @@ static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
  # if defined(MPROTECT_VDB)
     if (GC_incremental) SetUnhandledExceptionFilter(GC_write_fault_handler);
  # endif
+
+  if (GC_win32_dll_threads || !client_has_run) {
+    int i;
+    /* It appears to be unsafe to acquire a lock here, since this      */
+    /* code is apparently not preeemptible on some systems.            */
+    /* (This is based on complaints, not on Microsoft's official       */
+    /* documentation, which says this should perform "only simple      */
+    /* initialization tasks".)                                         */
+    /* Hence we make do with nonblocking synchronization.              */
+    /* It has been claimed that DllMain is really only executed with   */
+    /* a particular system lock held, and thus careful use of locking  */
+    /* around code that doesn't call back into the system libraries    */
+    /* might be OK.  But this hasn't been tested across all win32      */
+    /* variants.                                                       */
                  /* cast away volatile qualifier */
-  for (i = 0; InterlockedExchange((IE_t)&thread_table[i].in_use,1) != 0; i++) {
-    /* Compare-and-swap would make this cleaner, but that's not        */
-    /* supported before Windows 98 and NT 4.0.  In Windows 2000,       */
-    /* InterlockedExchange is supposed to be replaced by               */
-    /* InterlockedExchangePointer, but that's not really what I                */
-    /* want here.                                                      */
-    /* FIXME: We should eventually declare Win95 dead and use AO_      */
-    /* primitives here.                                                        */
-    if (i == MAX_THREADS - 1)
-      ABORT("too many threads");
-  }
-  /* Update GC_max_thread_index if necessary.  The following is safe,  */
-  /* and unlike CompareExchange-based solutions seems to work on all   */
-  /* Windows95 and later platforms.                                    */
-  /* Unfortunately, GC_max_thread_index may be temporarily out of      */
-  /* bounds, so readers have to compensate.                            */
-  while (i > GC_max_thread_index) {
-    InterlockedIncrement((IE_t)&GC_max_thread_index);
+    for (i = 0; InterlockedExchange((IE_t)&dll_thread_table[i].in_use,1) != 0;
+        i++) {
+      /* Compare-and-swap would make this cleaner, but that's not      */
+      /* supported before Windows 98 and NT 4.0.  In Windows 2000,     */
+      /* InterlockedExchange is supposed to be replaced by             */
+      /* InterlockedExchangePointer, but that's not really what I      */
+      /* want here.                                                    */
+      /* FIXME: We should eventually declare Win95 dead and use AO_    */
+      /* primitives here.                                              */
+      if (i == MAX_THREADS - 1)
+        ABORT("too many threads");
+    }
+    /* Update GC_max_thread_index if necessary.  The following is safe,        */
+    /* and unlike CompareExchange-based solutions seems to work on all */
+    /* Windows95 and later platforms.                                  */
+    /* Unfortunately, GC_max_thread_index may be temporarily out of    */
+    /* bounds, so readers have to compensate.                          */
+    while (i > GC_max_thread_index) {
+      InterlockedIncrement((IE_t)&GC_max_thread_index);
+    }
+    if (GC_max_thread_index >= MAX_THREADS) {
+      /* We overshot due to simultaneous increments.   */
+      /* Setting it to MAX_THREADS-1 is always safe.   */
+      GC_max_thread_index = MAX_THREADS - 1;
+    }
+    me = dll_thread_table + i;
    }
-  if (GC_max_thread_index >= MAX_THREADS) {
-    /* We overshot due to simultaneous increments.     */
-    /* Setting it to MAX_THREADS-1 is always safe.     */
-    GC_max_thread_index = MAX_THREADS - 1;
+  if (!GC_win32_dll_threads || !client_has_run) {
+    GC_ASSERT(I_HOLD_LOCK() || !client_has_run);
+    me = GC_new_thread(thread_id);
    }
    
  # ifdef CYGWIN32
-    thread_table[i].pthread_id = pthread_self();
+    me -> pthread_id = pthread_self();
  # endif
    if (!DuplicateHandle(GetCurrentProcess(),
                        GetCurrentThread(),
                        GetCurrentProcess(),
-                      (HANDLE*)&thread_table[i].handle,
+                      (HANDLE*)&(me -> handle),
                        0,
                        0,
                        DUPLICATE_SAME_ACCESS)) {
@@ -153,19 +299,25 @@ static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
         GC_err_printf("Last error code: %d\n", last_error);
         ABORT("DuplicateHandle failed");
    }
-  thread_table[i].stack_base = sb -> mem_base;
+  me -> stack_base = sb -> mem_base;
    /* Up until this point, GC_push_all_stacks considers this thread     */
    /* invalid.                                                          */
-  if (thread_table[i].stack_base == NULL) 
-    ABORT("Bad stack base in GC_register_my_thread");
+  if (me -> stack_base == NULL) 
+    ABORT("Bad stack base in GC_register_my_thread_inner");
    /* Up until this point, this entry is viewed as reserved but invalid */
    /* by GC_delete_thread.                                              */
-  thread_table[i].id = thread_id;
+  me -> id = thread_id;
+# if defined(THREAD_LOCAL_ALLOC)
+    GC_init_thread_local((GC_tlfs)(&(me->tlfs)));
+# endif
+  GC_ASSERT(!GC_please_stop || GC_win32_dll_threads);
+       /* Otherwise both we and the thread stopping code would be      */
+       /* holding the allocation lock.                                 */
    /* If this thread is being created while we are trying to stop       */
    /* the world, wait here.  Hopefully this can't happen on any */
    /* systems that don't allow us to block here.                        */
    while (GC_please_stop) Sleep(20);
-  return thread_table + i;
+  return (GC_thread)(me);
  }
  
  /*
@@ -187,149 +339,326 @@ LONG GC_get_max_thread_index()
  /* without a lock, but should be called in contexts in which the       */
  /* requested thread cannot be asynchronously deleted, e.g. from the    */
  /* thread itself.                                                      */
-static GC_thread GC_lookup_thread(DWORD thread_id) {
-  int i;
-  LONG my_max = GC_get_max_thread_index();
-
-  for (i = 0;
+/* This version assumes that either GC_win32_dll_threads is set, or    */
+/* we hold the allocator lock.                                         */
+/* Also used (for assertion checking only) from thread_local_alloc.c.  */
+GC_thread GC_lookup_thread_inner(DWORD thread_id) {
+  if (GC_win32_dll_threads) {
+    int i;
+    LONG my_max = GC_get_max_thread_index();
+    for (i = 0;
         i <= my_max &&
-       (!AO_load_acquire(&(thread_table[i].in_use))
-       || thread_table[i].id != thread_id);
+       (!AO_load_acquire(&(dll_thread_table[i].in_use))
+       || dll_thread_table[i].id != thread_id);
         /* Must still be in_use, since nobody else can store our thread_id. */
         i++) {}
-  if (i > my_max) {
-    return 0;
+    if (i > my_max) {
+      return 0;
+    } else {
+      return (GC_thread)(dll_thread_table + i);
+    }
    } else {
-    return thread_table + i;
+    int hv = ((word)thread_id) % THREAD_TABLE_SZ;
+    register GC_thread p = GC_threads[hv];
+    
+    GC_ASSERT(I_HOLD_LOCK());
+    while (p != 0 && p -> id != thread_id) p = p -> next;
+    return(p);
    }
  }
  
-int GC_register_my_thread(struct GC_stack_base *sb) {
-  DWORD t = GetCurrentThreadId();
-
-  if (0 == GC_lookup_thread(t)) {
-    /* We lock here, since we want to wait for an ongoing GC.  */
+/* A version of the above that acquires the lock if necessary.  Note   */
+/* that the identically named function for pthreads is different, and  */
+/* just assumes we hold the lock.                                      */
+/* Also used (for assertion checking only) from thread_local_alloc.c.  */
+static GC_thread GC_lookup_thread(DWORD thread_id)
+{
+  if (GC_win32_dll_threads) {
+    return GC_lookup_thread_inner(thread_id);
+  } else {
+    GC_thread result;
      LOCK();
-    GC_register_my_thread_inner(sb, t);
+    result = GC_lookup_thread_inner(thread_id);
      UNLOCK();
-    return GC_SUCCESS;
-  } else {
-    return GC_DUPLICATE;
+    return result;
    }
  }
  
-/* This is intended to be lock-free.                                   */
-/* It is either called synchronously from the thread being deleted,    */
-/* or by the joining thread.                                           */
-static void GC_delete_gc_thread(GC_thread thr)
+/* If a thread has been joined, but we have not yet            */
+/* been notified, then there may be more than one thread       */
+/* in the table with the same win32 id.                                */
+/* This is OK, but we need a way to delete a specific one.     */
+/* Assumes we hold the allocation lock unless                  */
+/* GC_win32_dll_threads is set.                                        */
+/* If GC_win32_dll_threads is set it should be called from the */
+/* thread being deleted.                                       */
+void GC_delete_gc_thread(GC_thread gc_id)
  {
-    CloseHandle(thr->handle);
+  if (GC_win32_dll_threads) {
+    /* This is intended to be lock-free.                               */
+    /* It is either called synchronously from the thread being deleted,        */
+    /* or by the joining thread.                                       */
+    CloseHandle(gc_id->handle);
        /* cast away volatile qualifier */
-    thr->stack_base = 0;
-    thr->id = 0;
+    gc_id -> stack_base = 0;
+    gc_id -> id = 0;
  #   ifdef CYGWIN32
-      thr->pthread_id = 0;
+      gc_id -> pthread_id = 0;
  #   endif /* CYGWIN32 */
-    AO_store_release(&(thr->in_use), FALSE);
+    AO_store_release(&(gc_id->in_use), FALSE);
+  } else {
+    DWORD id = gc_id -> id;
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    register GC_thread p = GC_threads[hv];
+    register GC_thread prev = 0;
+
+    GC_ASSERT(I_HOLD_LOCK());
+    while (p != gc_id) {
+        prev = p;
+        p = p -> next;
+    }
+    if (prev == 0) {
+        GC_threads[hv] = p -> next;
+    } else {
+        prev -> next = p -> next;
+    }
+    GC_INTERNAL_FREE(p);
+  }
  }
  
+/* Delete a thread from GC_threads.  We assume it is there.    */
+/* (The code intentionally traps if it wasn't.)                        */
+/* Assumes we hold the allocation lock unless                  */
+/* GC_win32_dll_threads is set.                                        */
+/* If GC_win32_dll_threads is set it should be called from the */
+/* thread being deleted.                                       */
+void GC_delete_thread(DWORD id)
+{
+  if (GC_win32_dll_threads) {
+    GC_thread t = GC_lookup_thread_inner(id);
  
-static void GC_delete_thread(DWORD thread_id) {
-  GC_thread t = GC_lookup_thread(thread_id);
+    if (0 == t) {
+      WARN("Removing nonexistent thread %ld\n", (GC_word)id);
+    } else {
+      GC_delete_gc_thread(t);
+    }
+  } else {
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    register GC_thread p = GC_threads[hv];
+    register GC_thread prev = 0;
+    
+    GC_ASSERT(I_HOLD_LOCK());
+    while (p -> id != id) {
+        prev = p;
+        p = p -> next;
+    }
+    if (prev == 0) {
+        GC_threads[hv] = p -> next;
+    } else {
+        prev -> next = p -> next;
+    }
+    GC_INTERNAL_FREE(p);
+  }
+}
+
+int GC_register_my_thread(struct GC_stack_base *sb) {
+  DWORD t = GetCurrentThreadId();
  
-  if (0 == t) {
-    WARN("Removing nonexistent thread %ld\n", (GC_word)thread_id);
+  if (0 == GC_lookup_thread(t)) {
+    /* We lock here, since we want to wait for an ongoing GC.  */
+    LOCK();
+    GC_register_my_thread_inner(sb, t);
+    UNLOCK();
+    return GC_SUCCESS;
    } else {
-    GC_delete_gc_thread(t);
+    return GC_DUPLICATE;
    }
  }
  
  int GC_unregister_my_thread(void)
  {
-    GC_delete_thread(GetCurrentThreadId());
+    if (GC_win32_dll_threads) {
+      /* Should we just ignore this? */
+      GC_delete_thread(GetCurrentThreadId());
+    } else {
+      LOCK();
+      GC_delete_thread(GetCurrentThreadId());
+      UNLOCK();
+    }
+#   if defined(THREAD_LOCAL_ALLOC)
+      LOCK();
+      {
+       GC_thread me = GC_lookup_thread_inner(GetCurrentThreadId());
+        GC_destroy_thread_local(&(me->tlfs));
+      }
+      UNLOCK();
+#   endif
      return GC_SUCCESS;
  }
  
  
  #ifdef CYGWIN32
  
+/* A quick-and-dirty cache of the mapping between pthread_t    */
+/* and win32 thread id.                                                */
+#define PTHREAD_MAP_SIZE 512
+DWORD GC_pthread_map_cache[PTHREAD_MAP_SIZE];
+#define HASH(pthread_id) ((((word)(pthread_id) >> 5)) % PTHREAD_MAP_SIZE)
+       /* It appears pthread_t is really a pointer type ... */
+#define SET_PTHREAD_MAP_CACHE(pthread_id, win32_id) \
+       GC_pthread_map_cache[HASH(pthread_id)] = (win32_id);
+#define GET_PTHREAD_MAP_CACHE(pthread_id) \
+       GC_pthread_map_cache[HASH(pthread_id)]
+
  /* Return a GC_thread corresponding to a given pthread_t.      */
  /* Returns 0 if it's not there.                                        */
  /* We assume that this is only called for pthread ids that     */
  /* have not yet terminated or are still joinable, and          */
  /* cannot be concurrently terminated.                          */
+/* Assumes we do NOT hold the allocation lock.                 */
  static GC_thread GC_lookup_pthread(pthread_t id)
  {
-  int i;
-  LONG my_max = GC_get_max_thread_index();
+  if (GC_win32_dll_threads) {
+    int i;
+    LONG my_max = GC_get_max_thread_index();
  
-  for (i = 0;
-       i <= my_max &&
-       (!AO_load_acquire(&(thread_table[i].in_use))
-       || thread_table[i].pthread_id != id);
+    for (i = 0;
+         i <= my_max &&
+         (!AO_load_acquire(&(dll_thread_table[i].in_use))
+         || dll_thread_table[i].pthread_id != id);
         /* Must still be in_use, since nobody else can store our thread_id. */
         i++);
-  if (i > my_max) return 0;
-  return thread_table + i;
+    if (i > my_max) return 0;
+    return (GC_thread)(dll_thread_table + i);
+  } else {
+    /* We first try the cache.  If that fails, we use a very slow      */
+    /* approach.                                                       */
+    int hv_guess = GET_PTHREAD_MAP_CACHE(id) % THREAD_TABLE_SZ;
+    int hv;
+    GC_thread p;
+
+    LOCK();
+    for (p = GC_threads[hv_guess]; 0 != p; p = p -> next) {
+      if (pthread_equal(p -> pthread_id, id))
+       goto foundit; 
+    }
+    for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) {
+      for (p = GC_threads[hv]; 0 != p; p = p -> next) {
+        if (pthread_equal(p -> pthread_id, id))
+         goto foundit; 
+      }
+    }
+    p = 0;
+   foundit:
+    UNLOCK();
+    return p;
+  }
  }
  
  #endif /* CYGWIN32 */
  
  void GC_push_thread_structures(void)
  {
+  GC_ASSERT(I_HOLD_LOCK());
+  if (GC_win32_dll_threads) {
      /* Unlike the other threads implementations, the thread table here */
      /* contains no pointers to the collectable heap.  Thus we have     */
      /* no private structures we need to preserve.                      */
-# ifdef CYGWIN32
-  { int i; /* pthreads may keep a pointer in the thread exit value */
-    LONG my_max = GC_get_max_thread_index();
+#   ifdef CYGWIN32
+    { int i; /* pthreads may keep a pointer in the thread exit value */
+      LONG my_max = GC_get_max_thread_index();
  
-    for (i = 0; i <= my_max; i++)
-      if (thread_table[i].in_use)
-       GC_push_all((ptr_t)&(thread_table[i].status),
-                    (ptr_t)(&(thread_table[i].status)+1));
+      for (i = 0; i <= my_max; i++)
+        if (dll_thread_table[i].in_use)
+         GC_push_all((ptr_t)&(dll_thread_table[i].status),
+                      (ptr_t)(&(dll_thread_table[i].status)+1));
+    }
+#   endif
+  } else {
+    GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
    }
+# if defined(THREAD_LOCAL_ALLOC)
+    GC_push_all((ptr_t)(&GC_thread_key),
+      (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
+    /* Just in case we ever use our own TLS implementation.    */
  # endif
  }
  
+/* Suspend the given thread, if it's still active.     */
+GC_suspend(GC_thread t)
+{
+# ifdef MSWINCE
+    /* SuspendThread will fail if thread is running kernel code */
+      while (SuspendThread(t -> handle) == (DWORD)-1)
+       Sleep(10);
+# else
+    /* Apparently the Windows 95 GetOpenFileName call creates  */
+    /* a thread that does not properly get cleaned up, and             */
+    /* SuspendThread on its descriptor may provoke a crash.            */
+    /* This reduces the probability of that event, though it still     */
+    /* appears there's a race here.                                    */
+    DWORD exitCode; 
+    if (GetExitCodeThread(t -> handle, &exitCode) &&
+        exitCode != STILL_ACTIVE) {
+      t -> stack_base = 0; /* prevent stack from being pushed */
+#     ifndef CYGWIN32
+        /* this breaks pthread_join on Cygwin, which is guaranteed to  */
+        /* only see user pthreads                                     */
+        AO_store(&(t -> in_use), FALSE);
+        CloseHandle(t -> handle);
+#     endif
+      return;
+    }
+    if (SuspendThread(t -> handle) == (DWORD)-1)
+      ABORT("SuspendThread failed");
+# endif
+   t -> suspended = TRUE;
+}
+
+/* Defined in misc.c */
+#ifndef CYGWIN32
+  extern CRITICAL_SECTION GC_write_cs;
+#endif
+
  void GC_stop_world(void)
  {
    DWORD thread_id = GetCurrentThreadId();
    int i;
  
    if (!GC_thr_initialized) ABORT("GC_stop_world() called before GC_thr_init()");
+  GC_ASSERT(I_HOLD_LOCK());
  
    GC_please_stop = TRUE;
-  for (i = 0; i <= GC_get_max_thread_index(); i++)
-    if (thread_table[i].stack_base != 0
-       && thread_table[i].id != thread_id) {
-#     ifdef MSWINCE
-        /* SuspendThread will fail if thread is running kernel code */
-       while (SuspendThread(thread_table[i].handle) == (DWORD)-1)
-         Sleep(10);
-#     else
-       /* Apparently the Windows 95 GetOpenFileName call creates       */
-       /* a thread that does not properly get cleaned up, and          */
-       /* SuspendThread on its descriptor may provoke a crash.         */
-       /* This reduces the probability of that event, though it still  */
-       /* appears there's a race here.                                 */
-       DWORD exitCode; 
-       if (GetExitCodeThread(thread_table[i].handle,&exitCode) &&
-            exitCode != STILL_ACTIVE) {
-          thread_table[i].stack_base = 0; /* prevent stack from being pushed */
-#         ifndef CYGWIN32
-            /* this breaks pthread_join on Cygwin, which is guaranteed to  */
-           /* only see user pthreads                                      */
-           AO_store(&(thread_table[i].in_use), FALSE);
-           CloseHandle(thread_table[i].handle);
-#         endif
-         continue;
-       }
-       if (SuspendThread(thread_table[i].handle) == (DWORD)-1)
-         ABORT("SuspendThread failed");
-#     endif
-      thread_table[i].suspended = TRUE;
+# ifndef CYGWIN32
+    EnterCriticalSection(&GC_write_cs);
+# endif
+  if (GC_win32_dll_threads) {
+    /* Any threads being created during this loop will end up sleeping */
+    /* in the thread registration code until GC_please_stop becomes    */
+    /* false.  This is not ideal, but hopefully correct.               */
+    for (i = 0; i <= GC_get_max_thread_index(); i++) {
+      volatile struct GC_Thread_Rep * t = dll_thread_table + i;
+      if (t -> stack_base != 0
+         && t -> id != thread_id) {
+         GC_suspend((GC_thread)t);
+      }
      }
+  } else {
+      GC_thread t;
+      int i;
+
+      for (i = 0; i < THREAD_TABLE_SZ; i++) {
+        for (t = GC_threads[i]; t != 0; t = t -> next) {
+         if (t -> stack_base != 0
+         && t -> id != thread_id) {
+           GC_suspend(t);
+         }
+       }
+      }
+  }
+# ifndef CYGWIN32
+    LeaveCriticalSection(&GC_write_cs);
+# endif    
  }
  
  void GC_start_world(void)
@@ -338,33 +667,34 @@ void GC_start_world(void)
    int i;
    LONG my_max = GC_get_max_thread_index();
  
-  for (i = 0; i <= my_max; i++)
-    if (thread_table[i].stack_base != 0 && thread_table[i].suspended
-       && thread_table[i].id != thread_id) {
-      if (ResumeThread(thread_table[i].handle) == (DWORD)-1)
-       ABORT("ResumeThread failed");
-      thread_table[i].suspended = FALSE;
+  GC_ASSERT(I_HOLD_LOCK());
+  if (GC_win32_dll_threads) {
+    for (i = 0; i <= my_max; i++) {
+      GC_thread t = (GC_thread)(dll_thread_table + i);
+      if (t -> stack_base != 0 && t -> suspended
+         && t -> id != thread_id) {
+        if (ResumeThread(t -> handle) == (DWORD)-1)
+         ABORT("ResumeThread failed");
+        t -> suspended = FALSE;
+      }
      }
-  GC_please_stop = FALSE;
-}
-
-# ifdef _MSC_VER
-#   pragma warning(disable:4715)
-# endif
-ptr_t GC_current_stackbottom(void)
-{
-  DWORD thread_id = GetCurrentThreadId();
-  int i;
-  LONG my_max = GC_get_max_thread_index();
+  } else {
+    GC_thread t;
+    int i;
  
-  for (i = 0; i <= my_max; i++)
-    if (thread_table[i].stack_base && thread_table[i].id == thread_id)
-      return thread_table[i].stack_base;
-  ABORT("no thread table entry for current thread");
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (t = GC_threads[i]; t != 0; t = t -> next) {
+        if (t -> stack_base != 0 && t -> suspended
+           && t -> id != thread_id) {
+          if (ResumeThread(t -> handle) == (DWORD)-1)
+           ABORT("ResumeThread failed");
+          t -> suspended = FALSE;
+        }
+      }
+    }
+  }
+  GC_please_stop = FALSE;
  }
-# ifdef _MSC_VER
-#   pragma warning(default:4715)
-# endif
  
  # ifdef MSWINCE
      /* The VirtualQuery calls below won't work properly on WinCE, but  */
@@ -387,26 +717,19 @@ ptr_t GC_current_stackbottom(void)
      }
  # endif
  
-void GC_push_all_stacks(void)
+void GC_push_stack_for(GC_thread thread)
  {
-  DWORD thread_id = GetCurrentThreadId();
-  GC_bool found_me = FALSE;
-  int i;
-  int dummy;
-  ptr_t sp, stack_min;
-  GC_thread thread;
-  LONG my_max = GC_get_max_thread_index();
-  
-  for (i = 0; i <= my_max; i++) {
-    thread = thread_table + i;
-    if (thread -> in_use && thread -> stack_base) {
-      if (thread -> id == thread_id) {
+    int dummy;
+    ptr_t sp, stack_min;
+    DWORD me = GetCurrentThreadId();
+
+    if (thread -> stack_base) {
+      if (thread -> id == me) {
         sp = (ptr_t) &dummy;
-       found_me = TRUE;
        } else {
          CONTEXT context;
          context.ContextFlags = CONTEXT_INTEGER|CONTEXT_CONTROL;
-        if (!GetThreadContext(thread_table[i].handle, &context))
+        if (!GetThreadContext(thread -> handle, &context))
           ABORT("GetThreadContext failed");
  
          /* Push all registers that might point into the heap.  Frame   */
@@ -446,17 +769,49 @@ void GC_push_all_stacks(void)
  #       else
  #         error "architecture is not supported"
  #       endif
-      }
+      } /* ! current thread */
  
        stack_min = GC_get_stack_min(thread->stack_base);
  
-      if (sp >= stack_min && sp < thread->stack_base)
+      if (sp >= stack_min && sp < thread->stack_base) {
+#       if DEBUG_CYGWIN_THREADS
+         GC_printf("Pushing thread from %p to %p for %d from %d\n",
+                   sp, thread -> stack_base, thread -> id, me);
+#       endif
          GC_push_all_stack(sp, thread->stack_base);
-      else {
+      } else {
          WARN("Thread stack pointer 0x%lx out of range, pushing everything\n",
              (unsigned long)sp);
          GC_push_all_stack(stack_min, thread->stack_base);
        }
+    } /* thread looks live */
+}
+
+void GC_push_all_stacks(void)
+{
+  DWORD me = GetCurrentThreadId();
+  GC_bool found_me = FALSE;
+  
+  if (GC_win32_dll_threads) {
+    int i;
+    LONG my_max = GC_get_max_thread_index();
+
+    for (i = 0; i <= my_max; i++) {
+      GC_thread t = (GC_thread)(dll_thread_table + i);
+      if (t -> in_use) {
+        GC_push_stack_for(t);
+        if (t -> id == me) found_me = TRUE;
+      }
+    }
+  } else {
+    GC_thread t;
+    int i;
+
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (t = GC_threads[i]; t != 0; t = t -> next) {
+        GC_push_stack_for(t);
+        if (t -> id == me) found_me = TRUE;
+      }
      }
    }
    if (!found_me) ABORT("Collecting from unknown thread.");
@@ -467,14 +822,29 @@ void GC_get_next_stack(char *start, char **lo, char **hi)
      int i;
  #   define ADDR_LIMIT (char *)(-1L)
      char * current_min = ADDR_LIMIT;
-    LONG my_max = GC_get_max_thread_index();
+
+    if (GC_win32_dll_threads) {
+      LONG my_max = GC_get_max_thread_index();
    
-    for (i = 0; i <= my_max; i++) {
-       char * s = (char *)thread_table[i].stack_base;
+      for (i = 0; i <= my_max; i++) {
+       ptr_t s = (ptr_t)(dll_thread_table[i].stack_base);
  
         if (0 != s && s > start && s < current_min) {
             current_min = s;
         }
+      }
+    } else {
+      for (i = 0; i < THREAD_TABLE_SZ; i++) {
+       GC_thread t;
+
+        for (t = GC_threads[i]; t != 0; t = t -> next) {
+         ptr_t s = (ptr_t)(t -> stack_base);
+
+         if (0 != s && s > start && s < current_min) {
+           current_min = s;
+         }
+        }
+      }
      }
      *hi = current_min;
      if (current_min == ADDR_LIMIT) {
@@ -487,21 +857,6 @@ void GC_get_next_stack(char *start, char **lo, char **hi)
  
  #if !defined(CYGWIN32)
  
-#if !defined(MSWINCE) && defined(GC_DLL)
-
-/* We register threads from DllMain */
-
-GC_API HANDLE WINAPI GC_CreateThread(
-    LPSECURITY_ATTRIBUTES lpThreadAttributes, 
-    DWORD dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress, 
-    LPVOID lpParameter, DWORD dwCreationFlags, LPDWORD lpThreadId )
-{
-    return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
-                        lpParameter, dwCreationFlags, lpThreadId);
-}
-
-#else /* defined(MSWINCE) || !defined(GC_DLL))  */
-
  /* We have no DllMain to take care of new threads.  Thus we    */
  /* must properly intercept thread creation.                    */
  
@@ -521,37 +876,42 @@ GC_API HANDLE WINAPI GC_CreateThread(
  
      thread_args *args;
  
-    if (!GC_is_initialized) GC_init();
-               /* make sure GC is initialized (i.e. main thread is attached) */
-    
-    args = GC_malloc_uncollectable(sizeof(thread_args)); 
+    if (!parallel_initialized) GC_init_parallel();
+               /* make sure GC is initialized (i.e. main thread is attached,
+                  tls initialized) */
+
+    client_has_run = TRUE;
+    if (GC_win32_dll_threads) {
+      return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
+                        lpParameter, dwCreationFlags, lpThreadId);
+    } else {
+      args = GC_malloc_uncollectable(sizeof(thread_args)); 
         /* Handed off to and deallocated by child thread.       */
-    if (0 == args) {
+      if (0 == args) {
         SetLastError(ERROR_NOT_ENOUGH_MEMORY);
          return NULL;
-    }
+      }
  
-    /* set up thread arguments */
+      /* set up thread arguments */
         args -> start = lpStartAddress;
         args -> param = lpParameter;
  
-    GC_need_to_lock = TRUE;
-    thread_h = CreateThread(lpThreadAttributes,
-                           dwStackSize, thread_start,
-                           args, dwCreationFlags,
-                           lpThreadId);
+      GC_need_to_lock = TRUE;
+      thread_h = CreateThread(lpThreadAttributes,
+                             dwStackSize, GC_win32_start,
+                             args, dwCreationFlags,
+                             lpThreadId);
  
-    return thread_h;
+      return thread_h;
+    }
  }
  
-static DWORD WINAPI thread_start(LPVOID arg)
+void * GC_win32_start_inner(struct GC_stack_base *sb, LPVOID arg)
  {
-    DWORD ret = 0;
+    void * ret;
      thread_args *args = (thread_args *)arg;
-    struct GC_stack_base *sb;
  
-    GC_get_stack_base(&sb);
-    GC_register_my_thread(&sb); /* This waits for an in-progress GC. */
+    GC_register_my_thread(sb); /* This waits for an in-progress GC. */
  
      /* Clear the thread entry even if we exit with an exception.       */
      /* This is probably pointless, since an uncaught exception is      */
@@ -559,10 +919,15 @@ static DWORD WINAPI thread_start(LPVOID arg)
  #ifndef __GNUC__
      __try {
  #endif /* __GNUC__ */
-       ret = args->start (args->param);
+       ret = (void *)args->start (args->param);
  #ifndef __GNUC__
      } __finally {
  #endif /* __GNUC__ */
+#       if defined(THREAD_LOCAL_ALLOC)
+          LOCK();
+          GC_destroy_thread_local(&(me->tlfs));
+          UNLOCK();
+#       endif
         GC_free(args);
         GC_delete_thread(GetCurrentThreadId());
  #ifndef __GNUC__
@@ -571,8 +936,11 @@ static DWORD WINAPI thread_start(LPVOID arg)
  
      return ret;
  }
-#endif /* !defined(MSWINCE) && !(defined(__MINGW32__) && !defined(_DLL))  */
  
+DWORD WINAPI GC_win32_start(struct GC_stack_base *sb, LPVOID arg)
+{
+    return (DWORD)GC_call_with_stack_base(GC_win32_start_inner, arg);
+}
  #endif /* !CYGWIN32 */
  
  #ifdef MSWINCE
@@ -630,13 +998,16 @@ DWORD WINAPI main_thread_start(LPVOID arg)
  /* Called by GC_init() - we hold the allocation lock.  */
  void GC_thr_init(void) {
      struct GC_stack_base sb;
+    int sb_result;
  
+    GC_ASSERT(I_HOLD_LOCK());
      if (GC_thr_initialized) return;
      GC_main_thread = GetCurrentThreadId();
      GC_thr_initialized = TRUE;
  
      /* Add the initial thread, so we can stop it.      */
-    GC_get_stack_base(&sb);
+    sb_result = GC_get_stack_base(&sb);
+    GC_ASSERT(sb_result == GC_SUCCESS);
      GC_register_my_thread(&sb);
  }
  
@@ -658,6 +1029,7 @@ int GC_pthread_join(pthread_t pthread_id, void **retval) {
                 (int)pthread_self(), GetCurrentThreadId(), (int)pthread_id);
  #   endif
  
+    client_has_run = TRUE;
      /* Thread being joined might not have registered itself yet. */
      /* After the join,thread id may have been recycled.                 */
      /* FIXME: It would be better if this worked more like       */
@@ -667,9 +1039,11 @@ int GC_pthread_join(pthread_t pthread_id, void **retval) {
  
      result = pthread_join(pthread_id, retval);
  
-    /* FIXME:  This is an asynchronous deletion, which we said can't   */
-    /* happen?                                                         */
-    GC_delete_gc_thread(joinee);
+    if (!GC_win32_dll_threads) {
+      LOCK();
+      GC_delete_gc_thread(joinee);
+      UNLOCK();
+    } /* otherwise dllmain handles it. */
  
  #   if DEBUG_CYGWIN_THREADS
        GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n",
@@ -690,8 +1064,12 @@ GC_pthread_create(pthread_t *new_thread,
      int result;
      struct start_info * si;
  
-    if (!GC_is_initialized) GC_init();
+    if (!parallel_initialized) GC_init_parallel();
                 /* make sure GC is initialized (i.e. main thread is attached) */
+    client_has_run = TRUE;
+    if (GC_win32_dll_threads) {
+      return pthread_create(new_thread, attr, start_routine, arg);
+    }
      
      /* This is otherwise saved only in an area mmapped by the thread */
      /* library, which isn't visible to the collector.           */
@@ -711,7 +1089,7 @@ GC_pthread_create(pthread_t *new_thread,
                 (int)pthread_self(), GetCurrentThreadId);
  #   endif
      GC_need_to_lock = TRUE;
-    result = pthread_create(new_thread, attr, GC_start_routine, si); 
+    result = pthread_create(new_thread, attr, GC_pthread_start, si); 
  
      if (result) { /* failure */
         GC_free(si);
@@ -720,24 +1098,24 @@ GC_pthread_create(pthread_t *new_thread,
      return(result);
  }
  
-void * GC_start_routine(void * arg)
+void * GC_pthread_start_inner(struct GC_stack_base *sb, void * arg)
  {
      struct start_info * si = arg;
      void * result;
      void *(*start)(void *);
      void *start_arg;
-    pthread_t pthread_id;
      DWORD thread_id = GetCurrentThreadId();
+    pthread_t pthread_id = pthread_self();
      GC_thread me;
      GC_bool detached;
      int i;
-    struct GC_stack_base sb;
  
  #   if DEBUG_CYGWIN_THREADS
-      GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_self(),
+      GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_id,
                                                   thread_id);
  #   endif
  
+    GC_ASSERT(!GC_win32_dll_threads);
      /* If a GC occurs before the thread is registered, that GC will    */
      /* ignore this thread.  That's fine, since it will block trying to  */
      /* acquire the allocation lock, and won't yet hold interesting     */
@@ -745,14 +1123,14 @@ void * GC_start_routine(void * arg)
      LOCK();
      /* We register the thread here instead of in the parent, so that   */
      /* we don't need to hold the allocation lock during pthread_create. */
-    GC_get_stack_base(&sb);
-    me = GC_register_my_thread_inner(&sb, thread_id);
+    me = GC_register_my_thread_inner(sb, thread_id);
+    SET_PTHREAD_MAP_CACHE(pthread_id, thread_id);
      UNLOCK();
  
      start = si -> start_routine;
      start_arg = si -> arg;
      if (si-> detached) me -> flags |= DETACHED;
-    me -> pthread_id = pthread_id = pthread_self();
+    me -> pthread_id = pthread_id;
  
      GC_free(si); /* was allocated uncollectable */
  
@@ -769,17 +1147,26 @@ void * GC_start_routine(void * arg)
      return(result);
  }
  
+void * GC_pthread_start(void * arg)
+{
+    return GC_call_with_stack_base(GC_pthread_start_inner, arg);
+}
+
  void GC_thread_exit_proc(void *arg)
  {
      GC_thread me = (GC_thread)arg;
      int i;
  
+    GC_ASSERT(!GC_win32_dll_threads);
  #   if DEBUG_CYGWIN_THREADS
        GC_printf("thread 0x%x(0x%x) called pthread_exit().\n",
                 (int)pthread_self(),GetCurrentThreadId());
  #   endif
  
      LOCK();
+#   if defined(THREAD_LOCAL_ALLOC)
+      GC_destroy_thread_local(&(me->tlfs));
+#   endif
      if (me -> flags & DETACHED) {
        GC_delete_thread(GetCurrentThreadId());
      } else {
@@ -791,6 +1178,7 @@ void GC_thread_exit_proc(void *arg)
  
  /* nothing required here... */
  int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset) {
+  client_has_run = TRUE;
    return pthread_sigmask(how, set, oset);
  }
  
@@ -799,6 +1187,7 @@ int GC_pthread_detach(pthread_t thread)
      int result;
      GC_thread thread_gc_id;
      
+    client_has_run = TRUE;
      LOCK();
      thread_gc_id = GC_lookup_pthread(thread);
      UNLOCK();
@@ -827,6 +1216,9 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved)
  {
    struct GC_stack_base sb;
    DWORD thread_id;
+  int sb_result;
+
+  if (client_has_run && !GC_win32_dll_threads) return TRUE;
  
    switch (reason) {
    case DLL_PROCESS_ATTACH:
@@ -837,28 +1229,32 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved)
      thread_id = GetCurrentThreadId();
      if (GC_main_thread != thread_id) {
         /* Don't lock here.     */
-       GC_get_stack_base(&sb);
+        sb_result = GC_get_stack_base(&sb);
+        GC_ASSERT(sb_result == GC_SUCCESS);
+#       ifdef THREAD_LOCAL_ALLOC
+         ABORT("Cannot initialize thread local cache from DllMain");
+#       endif
         GC_register_my_thread_inner(&sb, thread_id);
      } /* o.w. we already did it during GC_thr_init(), called by GC_init() */
      break;
  
    case DLL_THREAD_DETACH:
-    LOCK();    /* Safe? DllMain description is ambiguous.      */
+    /* We are hopefully running in the context of the exiting thread.  */
+    client_has_run = TRUE;
+    if (!GC_win32_dll_threads) return TRUE;
      GC_delete_thread(GetCurrentThreadId());
-    UNLOCK();
      break;
  
    case DLL_PROCESS_DETACH:
      {
        int i;
  
-      LOCK();
+      if (!GC_win32_dll_threads) return TRUE;
        for (i = 0; i <= GC_get_max_thread_index(); ++i)
        {
-          if (AO_load(&(thread_table[i].in_use)))
-           GC_delete_gc_thread(thread_table + i);
+          if (AO_load(&(dll_thread_table[i].in_use)))
+           GC_delete_gc_thread(dll_thread_table + i);
        }
-      UNLOCK();
  
        GC_deinit();
        DeleteCriticalSection(&GC_allocate_ml);
@@ -873,12 +1269,80 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved)
  
  # endif /* !MSWINCE */
  
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+/* Perform all initializations, including those that   */
+/* may require allocation.                             */
+/* Called without allocation lock.                     */
+/* Must be called before a second thread is created.   */
+void GC_init_parallel(void)
+{
+    if (parallel_initialized) return;
+    parallel_initialized = TRUE;
+
+    /* GC_init() calls us back, so set flag first.     */
+    if (!GC_is_initialized) GC_init();
+    /* Initialize thread local free lists if used.     */
+#   if defined(THREAD_LOCAL_ALLOC)
+      LOCK();
+      GC_init_thread_local(&(GC_lookup_thread(GetCurrentThreadId())->tlfs));
+      UNLOCK();
+#   endif
+}
+
+#if defined(USE_PTHREAD_LOCKS)
+  /* Support for pthread locking code.         */
+  /* Pthread_mutex_try_lock may not win here,  */
+  /* due to builtinsupport for spinning first? */
+
+volatile GC_bool GC_collecting = 0;
+                       /* A hint that we're in the collector and       */
+                        /* holding the allocation lock for an           */
+                        /* extended period.                             */
+
+void GC_lock(void)
+{
+    pthread_mutex_lock(&GC_allocate_ml);
+}
+#endif /* USE_PTHREAD ... */
  
-/* We don't really support thread-local allocation with DBG_HDRS_ALL */
+# if defined(THREAD_LOCAL_ALLOC)
  
  /* Add thread-local allocation support.  Microsoft uses __declspec(thread) */
  
+/* We must explicitly mark ptrfree and gcj free lists, since the free  */
+/* list links wouldn't otherwise be found.  We also set them in the    */
+/* normal free lists, since that involves touching less memory than if */
+/* we scanned them normally.                                           */
+void GC_mark_thread_local_free_lists(void)
+{
+    int i;
+    GC_thread p;
+    
+    for (i = 0; i < THREAD_TABLE_SZ; ++i) {
+      for (p = GC_threads[i]; 0 != p; p = p -> next) {
+       GC_mark_thread_local_fls_for(&(p->tlfs));
+      }
+    }
+}
+
+#if defined(GC_ASSERTIONS)
+    /* Check that all thread-local free-lists are completely marked.   */
+    /* also check that thread-specific-data structures are marked.     */
+    void GC_check_tls(void) {
+       int i;
+       GC_thread p;
+       
+       for (i = 0; i < THREAD_TABLE_SZ; ++i) {
+         for (p = GC_threads[i]; 0 != p; p = p -> next) {
+           GC_check_tls_for(&(p->tlfs));
+         }
+       }
+#       if defined(USE_CUSTOM_SPECIFIC)
+         if (GC_thread_key != 0)
+           GC_check_tsd_marks(GC_thread_key);
+#      endif 
+    }
+#endif /* GC_ASSERTIONS */
+
  #endif /* THREAD_LOCAL_ALLOC ... */
  
  #endif /* GC_WIN32_THREADS */
author	Ivan Maidanski <ivmai@mail.ru>
	Tue, 26 Jul 2011 16:30:36 +0000 (20:30 +0400)
committer	Ivan Maidanski <ivmai@mail.ru>
	Tue, 26 Jul 2011 16:30:36 +0000 (20:30 +0400)
Makefile		patch \| blob \| history
Makefile.direct		patch \| blob \| history
Makefile.in		patch \| blob \| history
NT_STATIC_THREADS_MAKEFILE		patch \| blob \| history
allchblk.c		patch \| blob \| history
alloc.c		patch \| blob \| history
configure		patch \| blob \| history
configure.ac		patch \| blob \| history
doc/README		patch \| blob \| history
doc/README.changes		patch \| blob \| history
doc/README.linux		patch \| blob \| history
doc/doc.am		patch \| blob \| history
doc/gcdescr.html		patch \| blob \| history
doc/overview.html	[new file with mode: 0644]	patch \| blob
headers.c		patch \| blob \| history
include/gc.h		patch \| blob \| history
include/gc_config_macros.h		patch \| blob \| history
include/gc_inline.h		patch \| blob \| history
include/private/gc_locks.h		patch \| blob \| history
include/private/gc_pmark.h		patch \| blob \| history
include/private/gc_priv.h		patch \| blob \| history
include/private/gcconfig.h		patch \| blob \| history
include/private/pthread_support.h		patch \| blob \| history
include/private/thread_local_alloc.h		patch \| blob \| history
mach_dep.c		patch \| blob \| history
malloc.c		patch \| blob \| history
mallocx.c		patch \| blob \| history
mark.c		patch \| blob \| history
mark_rts.c		patch \| blob \| history
misc.c		patch \| blob \| history
os_dep.c		patch \| blob \| history
pthread_stop_world.c		patch \| blob \| history
pthread_support.c		patch \| blob \| history
reclaim.c		patch \| blob \| history
setjmp_t.c		patch \| blob \| history
sparc_mach_dep.S		patch \| blob \| history
tests/leak_test.c		patch \| blob \| history
tests/test.c		patch \| blob \| history
tests/test_cpp.cc		patch \| blob \| history
tests/thread_leak_test.c		patch \| blob \| history
thread_local_alloc.c		patch \| blob \| history
threadlibs.c		patch \| blob \| history
typd_mlc.c		patch \| blob \| history
version.h		patch \| blob \| history
win32_threads.c		patch \| blob \| history