# Atomic_ops installation directory. If this doesn't exist, we create
# it from the included libatomic_ops distribution.
-AO_VERSION=1.0
+AO_VERSION=1.1
AO_SRC_DIR=$(srcdir)/libatomic_ops-$(AO_VERSION)
AO_INSTALL_DIR=$(srcdir)/libatomic_ops-install
DOC_FILES= README.QUICK doc/README.Mac doc/README.MacOSX doc/README.OS2 \
doc/README.amiga doc/README.cords doc/debugging.html \
- doc/porting.html \
+ doc/porting.html doc/overview.html \
doc/README.dj doc/README.hp doc/README.linux doc/README.rs6000 \
doc/README.sgi doc/README.solaris2 doc/README.uts \
doc/README.win32 doc/barrett_diagram doc/README \
mark.o typd_mlc.o finalize.o ptr_chck.o: $(srcdir)/include/gc_mark.h \
$(srcdir)/include/private/gc_pmark.h
-specific.o pthread_support.o: $(srcdir)/include/private/specific.h \
- $(srcdir)/include/gc_inline.h
+specific.o pthread_support.o thread_local_alloc.o win32_threads.o: \
+ $(srcdir)/include/private/specific.h $(srcdir)/include/gc_inline.h \
+ $(srcdir)/include/private/thread_local_alloc.h
dbg_mlc.o gcj_mlc.o: $(srcdir)/include/private/dbg_mlc.h
base_lib gc.a: $(OBJS) dyn_load.o $(UTILS)
echo > base_lib
rm -f dont_ar_1
+ cp $(AO_INSTALL_DIR)/lib/libatomic_ops.a gc.a
./if_mach SPARC SUNOS5 touch dont_ar_1
./if_mach SPARC SUNOS5 $(AR) rus gc.a $(OBJS) dyn_load.o
./if_mach M68K AMIGA touch dont_ar_1
# SunOS5 shared library version of the collector
sunos5gc.so: $(OBJS) dyn_load_sunos53.o
- $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o -ldl
+ $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o $(AO_INSTALL_DIR)/lib/libatomic_ops.a -ldl
ln sunos5gc.so libgc.so
# Alpha/OSF shared library version of the collector
$(srcdir)/ia64_save_regs_in_stack.s \
$(srcdir)/sparc_netbsd_mach_dep.s $(UTILS)
rm -f mach_dep.o
- ./if_mach MIPS IRIX5 $(CC) -c -o mach_dep.o $(srcdir)/mips_sgi_mach_dep.s
- ./if_mach MIPS RISCOS $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
- ./if_mach MIPS ULTRIX $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
- ./if_mach POWERPC DARWIN $(AS) -o mach_dep.o $(srcdir)/powerpc_darwin_mach_dep.s
- ./if_mach ALPHA LINUX $(CC) -c -o mach_dep.o $(srcdir)/alpha_mach_dep.S
- ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep.o $(srcdir)/sparc_mach_dep.S
- ./if_mach SPARC OPENBSD $(AS) -o mach_dep.o $(srcdir)/sparc_sunos4_mach_dep.s
- ./if_mach SPARC NETBSD $(AS) -o mach_dep.o $(srcdir)/sparc_netbsd_mach_dep.s
+ ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep2.o $(srcdir)/sparc_mach_dep.S
+ ./if_mach SPARC OPENBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_sunos4_mach_dep.s
+ ./if_mach SPARC NETBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_netbsd_mach_dep.s
+ ./if_mach SPARC "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
+ ./if_mach SPARC "" ld -r -o mach_dep.o mach_dep1.o mach_dep2.o
./if_mach IA64 "" as $(AS_ABI_FLAG) -o ia64_save_regs_in_stack.o $(srcdir)/ia64_save_regs_in_stack.s
./if_mach IA64 "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
./if_mach IA64 "" ld -r -o mach_dep.o mach_dep1.o ia64_save_regs_in_stack.o
# If an optimized setjmp_test generates a segmentation fault,
# odds are your compiler is broken. Gctest may still work.
# Try compiling setjmp_t.c unoptimized.
-setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS)
+setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) $(AO_INSTALL_DIR)
$(CC) $(CFLAGS) -o setjmp_test $(srcdir)/setjmp_t.c
test: KandRtest cord/cordtest
# Atomic_ops installation directory. If this doesn't exist, we create
# it from the included libatomic_ops distribution.
-AO_VERSION=1.0
+AO_VERSION=1.1
AO_SRC_DIR=$(srcdir)/libatomic_ops-$(AO_VERSION)
AO_INSTALL_DIR=$(srcdir)/libatomic_ops-install
DOC_FILES= README.QUICK doc/README.Mac doc/README.MacOSX doc/README.OS2 \
doc/README.amiga doc/README.cords doc/debugging.html \
- doc/porting.html \
+ doc/porting.html doc/overview.html \
doc/README.dj doc/README.hp doc/README.linux doc/README.rs6000 \
doc/README.sgi doc/README.solaris2 doc/README.uts \
doc/README.win32 doc/barrett_diagram doc/README \
mark.o typd_mlc.o finalize.o ptr_chck.o: $(srcdir)/include/gc_mark.h \
$(srcdir)/include/private/gc_pmark.h
-specific.o pthread_support.o: $(srcdir)/include/private/specific.h \
- $(srcdir)/include/gc_inline.h
+specific.o pthread_support.o thread_local_alloc.o win32_threads.o: \
+ $(srcdir)/include/private/specific.h $(srcdir)/include/gc_inline.h \
+ $(srcdir)/include/private/thread_local_alloc.h
dbg_mlc.o gcj_mlc.o: $(srcdir)/include/private/dbg_mlc.h
base_lib gc.a: $(OBJS) dyn_load.o $(UTILS)
echo > base_lib
rm -f dont_ar_1
+ cp $(AO_INSTALL_DIR)/lib/libatomic_ops.a gc.a
./if_mach SPARC SUNOS5 touch dont_ar_1
./if_mach SPARC SUNOS5 $(AR) rus gc.a $(OBJS) dyn_load.o
./if_mach M68K AMIGA touch dont_ar_1
# SunOS5 shared library version of the collector
sunos5gc.so: $(OBJS) dyn_load_sunos53.o
- $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o -ldl
+ $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o $(AO_INSTALL_DIR)/lib/libatomic_ops.a -ldl
ln sunos5gc.so libgc.so
# Alpha/OSF shared library version of the collector
$(srcdir)/ia64_save_regs_in_stack.s \
$(srcdir)/sparc_netbsd_mach_dep.s $(UTILS)
rm -f mach_dep.o
- ./if_mach MIPS IRIX5 $(CC) -c -o mach_dep.o $(srcdir)/mips_sgi_mach_dep.s
- ./if_mach MIPS RISCOS $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
- ./if_mach MIPS ULTRIX $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
- ./if_mach POWERPC DARWIN $(AS) -o mach_dep.o $(srcdir)/powerpc_darwin_mach_dep.s
- ./if_mach ALPHA LINUX $(CC) -c -o mach_dep.o $(srcdir)/alpha_mach_dep.S
- ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep.o $(srcdir)/sparc_mach_dep.S
- ./if_mach SPARC OPENBSD $(AS) -o mach_dep.o $(srcdir)/sparc_sunos4_mach_dep.s
- ./if_mach SPARC NETBSD $(AS) -o mach_dep.o $(srcdir)/sparc_netbsd_mach_dep.s
+ ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep2.o $(srcdir)/sparc_mach_dep.S
+ ./if_mach SPARC OPENBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_sunos4_mach_dep.s
+ ./if_mach SPARC NETBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_netbsd_mach_dep.s
+ ./if_mach SPARC "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
+ ./if_mach SPARC "" ld -r -o mach_dep.o mach_dep1.o mach_dep2.o
./if_mach IA64 "" as $(AS_ABI_FLAG) -o ia64_save_regs_in_stack.o $(srcdir)/ia64_save_regs_in_stack.s
./if_mach IA64 "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
./if_mach IA64 "" ld -r -o mach_dep.o mach_dep1.o ia64_save_regs_in_stack.o
# If an optimized setjmp_test generates a segmentation fault,
# odds are your compiler is broken. Gctest may still work.
# Try compiling setjmp_t.c unoptimized.
-setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS)
+setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) $(AO_INSTALL_DIR)
$(CC) $(CFLAGS) -o setjmp_test $(srcdir)/setjmp_t.c
test: KandRtest cord/cordtest
doc/README.solaris2 \
doc/README.uts \
doc/README.win32 \
+ doc/overview.html \
doc/tree.html \
doc/leak.html \
doc/gcinterface.html \
# should do, since we only need the headers.
# We assume this was manually unpacked, since I'm not sure there is
# a Windows standard command line tool to do this.
-AO_VERSION=0.6
-AO_SRC_DIR=$(srcdir)/atomic_ops-$(AO_VERSION)
+AO_VERSION=1.1
+AO_SRC_DIR=libatomic_ops-$(AO_VERSION)/src
AO_INCLUDE_DIR=$(AO_SRC_DIR)
OBJS= alloc.obj reclaim.obj allchblk.obj misc.obj mach_dep.obj os_dep.obj mark_rts.obj headers.obj mark.obj obj_map.obj blacklst.obj finalize.obj new_hblk.obj dbg_mlc.obj malloc.obj stubborn.obj dyn_load.obj typd_mlc.obj ptr_chck.obj gc_cpp.obj mallocx.obj win32_threads.obj
/* free blocks in GC_add_to_fl. */
# endif
# ifdef USE_MUNMAP
- hhdr -> hb_last_reclaimed = GC_gc_no;
+ hhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no;
# endif
hhdr -> hb_sz = h_size;
GC_add_to_fl(h, hhdr);
}
struct hblk *
-GC_allochblk_nth(word sz/* bytes */, int kind, unsigned char flags, int n);
+GC_allochblk_nth(size_t sz/* bytes */, int kind, unsigned char flags, int n);
/*
* Allocate (and return pointer to) a heap block
* The client is responsible for clearing the block, if necessary.
*/
struct hblk *
-GC_allochblk(size_t sz, int kind, unsigned flags/* IGNORE_OFF_PAGE or 0 */)
+GC_allochblk(size_t sz, int kind, unsigned char flags/* IGNORE_OFF_PAGE or 0 */)
{
word blocks;
int start_list;
* Unlike the above, sz is in bytes.
*/
struct hblk *
-GC_allochblk_nth(word sz, int kind, unsigned char flags, int n)
+GC_allochblk_nth(size_t sz, int kind, unsigned char flags, int n)
{
struct hblk *hbp;
hdr * hhdr; /* Header corr. to hbp */
GC_remove_counts(hbp, (word)size);
hhdr->hb_sz = size;
# ifdef USE_MUNMAP
- hhdr -> hb_last_reclaimed = GC_gc_no;
+ hhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no;
# endif
/* Check for duplicate deallocation in the easy case */
GC_remove_from_fl(prevhdr, FL_UNKNOWN);
prevhdr -> hb_sz += hhdr -> hb_sz;
# ifdef USE_MUNMAP
- prevhdr -> hb_last_reclaimed = GC_gc_no;
+ prevhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no;
# endif
GC_remove_header(hbp);
hbp = prev;
}
bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, sz);
if (mark_bit_from_hdr(hhdr, bit_no)) {
+ int n_marks = hhdr -> hb_n_marks - 1;
clear_mark_bit_from_hdr(hhdr, bit_no);
- --hhdr -> hb_n_marks;
+# ifdef PARALLEL_MARK
+ /* Appr. count, don't decrement to zero! */
+ if (0 != n_marks) {
+ hhdr -> hb_n_marks = n_marks;
+ }
+# else
+ hhdr -> hb_n_marks = n_marks;
+# endif
}
GC_bytes_found -= sz;
}
#! /bin/sh
# From configure.ac Revision: 1.2 .
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.59 for gc 7.0alpha4.
+# Generated by GNU Autoconf 2.59 for gc 7.0alpha5.
#
# Report bugs to <Hans.Boehm@hp.com>.
#
# Identity of this package.
PACKAGE_NAME='gc'
PACKAGE_TARNAME='gc'
-PACKAGE_VERSION='7.0alpha4'
-PACKAGE_STRING='gc 7.0alpha4'
+PACKAGE_VERSION='7.0alpha5'
+PACKAGE_STRING='gc 7.0alpha5'
PACKAGE_BUGREPORT='Hans.Boehm@hp.com'
ac_unique_file="gcj_mlc.c"
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gc 7.0alpha4 to adapt to many kinds of systems.
+\`configure' configures gc 7.0alpha5 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gc 7.0alpha4:";;
+ short | recursive ) echo "Configuration of gc 7.0alpha5:";;
esac
cat <<\_ACEOF
test -n "$ac_init_help" && exit 0
if $ac_init_version; then
cat <<\_ACEOF
-gc configure 7.0alpha4
+gc configure 7.0alpha5
generated by GNU Autoconf 2.59
Copyright (C) 2003 Free Software Foundation, Inc.
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gc $as_me 7.0alpha4, which was
+It was created by gc $as_me 7.0alpha5, which was
generated by GNU Autoconf 2.59. Invocation command line was
$ $0 $@
# Define the identity of the package.
PACKAGE='gc'
- VERSION='7.0alpha4'
+ VERSION='7.0alpha5'
cat >>confdefs.h <<_ACEOF
machdep=
case "$host" in
alpha-*-openbsd*)
- machdep="alpha_mach_dep.lo"
+ machdep="mach_dep.lo"
if test x"${ac_cv_lib_dl_dlopen}" != xyes ; then
{ echo "$as_me:$LINENO: WARNING: OpenBSD/Alpha without dlopen(). Shared library support is disabled" >&5
echo "$as_me: WARNING: OpenBSD/Alpha without dlopen(). Shared library support is disabled" >&2;}
fi
;;
alpha*-*-linux*)
- machdep="alpha_mach_dep.lo"
+ machdep="mach_dep.lo"
;;
i?86-*-solaris2.[89] | i?86-*-solaris2.1?)
cat >>confdefs.h <<\_ACEOF
;;
mipstx39-*-elf*)
- machdep="mips_ultrix_mach_dep.lo"
+ machdep="mach_dep.lo"
cat >>confdefs.h <<\_ACEOF
#define STACKBASE __stackbase
_ACEOF
;;
mips-dec-ultrix*)
- machdep="mips_ultrix_mach-dep.lo"
+ machdep="mach-dep.lo"
;;
mips-nec-sysv*|mips-unknown-sysv*)
;;
mips*-*-linux*)
;;
mips-*-*)
- machdep="mips_sgi_mach_dep.lo"
+ machdep="mach_dep.lo"
cat >>confdefs.h <<\_ACEOF
#define NO_EXECUTE_PERMISSION 1
_ACEOF
;;
sparc-*-netbsd*)
- machdep="sparc_netbsd_mach_dep.lo"
+ machdep="mach_dep.lo sparc_netbsd_mach_dep.lo"
;;
sparc-sun-solaris2.3)
- machdep="sparc_mach_dep.lo"
+ machdep="mach_dep.lo sparc_mach_dep.lo"
cat >>confdefs.h <<\_ACEOF
#define SUNOS53_SHARED_LIB 1
_ACEOF
;;
sparc*-sun-solaris2.*)
- machdep="sparc_mach_dep.lo"
+ machdep="mach_dep.lo sparc_mach_dep.lo"
;;
ia64-*-*)
machdep="mach_dep.lo ia64_save_regs_in_stack.lo"
} >&5
cat >&5 <<_CSEOF
-This file was extended by gc $as_me 7.0alpha4, which was
+This file was extended by gc $as_me 7.0alpha5, which was
generated by GNU Autoconf 2.59. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-gc config.status 7.0alpha4
+gc config.status 7.0alpha5
configured by $0, generated by GNU Autoconf 2.59,
with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
# Initialization
# ==============
-AC_INIT(gc,7.0alpha4,Hans.Boehm@hp.com)
+AC_INIT(gc,7.0alpha5,Hans.Boehm@hp.com)
## version must conform to [0-9]+[.][0-9]+(alpha[0-9]+)?
AC_CONFIG_SRCDIR(gcj_mlc.c)
AC_CANONICAL_TARGET
machdep=
case "$host" in
alpha-*-openbsd*)
- machdep="alpha_mach_dep.lo"
+ machdep="mach_dep.lo"
if test x"${ac_cv_lib_dl_dlopen}" != xyes ; then
AC_MSG_WARN(OpenBSD/Alpha without dlopen(). Shared library support is disabled)
fi
;;
alpha*-*-linux*)
- machdep="alpha_mach_dep.lo"
+ machdep="mach_dep.lo"
;;
i?86-*-solaris2.[[89]] | i?86-*-solaris2.1?)
AC_DEFINE(SOLARIS25_PROC_VDB_BUG_FIXED)
;;
mipstx39-*-elf*)
- machdep="mips_ultrix_mach_dep.lo"
+ machdep="mach_dep.lo"
AC_DEFINE(STACKBASE, __stackbase)
AC_DEFINE(DATASTART_IS_ETEXT)
;;
mips-dec-ultrix*)
- machdep="mips_ultrix_mach-dep.lo"
+ machdep="mach-dep.lo"
;;
mips-nec-sysv*|mips-unknown-sysv*)
;;
mips*-*-linux*)
;;
mips-*-*)
- machdep="mips_sgi_mach_dep.lo"
+ machdep="mach_dep.lo"
AC_DEFINE(NO_EXECUTE_PERMISSION)
;;
sparc-*-netbsd*)
- machdep="sparc_netbsd_mach_dep.lo"
+ machdep="mach_dep.lo sparc_netbsd_mach_dep.lo"
;;
sparc-sun-solaris2.3)
- machdep="sparc_mach_dep.lo"
+ machdep="mach_dep.lo sparc_mach_dep.lo"
AC_DEFINE(SUNOS53_SHARED_LIB)
;;
sparc*-sun-solaris2.*)
- machdep="sparc_mach_dep.lo"
+ machdep="mach_dep.lo sparc_mach_dep.lo"
;;
ia64-*-*)
machdep="mach_dep.lo ia64_save_regs_in_stack.lo"
collector. (If you are concerned about such things, I recommend you look
at the notice in config.guess or ltmain.sh.)
-This is version 7.0alpha2 of a conservative garbage collector for C and C++.
+This is version 7.0alpha5 of a conservative garbage collector for C and C++.
You might find a more recent version of this at
there.
- More consistently define HBLKSIZE to 4096 on 64 bit architectures with
4K pages. (Thanks to Andrew Haley.)
-
-Since gc6.6:
+ - With win32 threads, GC_stop_world needs to acquire GC_write_cs. (Thanks
+ to Ben Hutchings for the observation and patch.)
+ - Move up struct callinfo declaration to make gcc 4.0.2. happy.
+
+Since 6.6:
+ - Add "int" to Solaris "end" and "etext" declaration in gc.h. Declared
+ the symbols with underscores and as arrays, since that's what's actually
+ used. Perhaps this could all just be removed? (Thanks to John Bowman.)
+ - Fixed ARM GC_test_and_set code. (Thanks to Kazu Hirata and Paul Brook.)
+ - Added casts for assignments to hb_last_reclaimed, which truncate the
+ value. Added a cast to GC_adj_words_allocd. Use GetModuleHandleA
+ when retrieving a handle to kernel32.dll under win32. (Thanks to the
+ Visual Prolog developers.)
+
+Since gc6.7:
- Remove GC_PROTO, VOLATILE, GC_PTR, and GC_CONST. Assume ANSI C compiler
and use ANSI constructs unconditionally.
- Introduce #elif and #error in some of the appropriate places.
- Added GC_getattr_np-based GC_get_stack_base (untested).
- Separated thread local allocation into a separate file and added the
beginning of win32 support for that.
+
+Since gc7.0alpha4
+ (more 6.6, 6.7 changes)
+ - Some Solaris fixes, including some more general changes in how
+ the assembly pieces of mach_dep.c are handled.
+ - Removed a lot of SOLARIS_THREADS-specific code that was only
+ needed with the old implementation. This included many (mostly no-op)
+ versions of GC_is_fresh.
+ - Don't use atomic_ops in gc_locks.h unless we need threads.
+ - Fixed USE_MARK_BITS, which is once againthe default without PARALLEL_MARK.
+ - Removed Solaris GC_INIT hack. It's a workaround for a long dead bug,
+ and it seemed to be wrong anyway.
+ - Changed win32_threads.c to require preprocessor-based interception
+ of thread routines by default. A client call to GC_use_DllMain is
+ now required to get the old behavior in which DllMain is used to implicitly
+ register threads. This was doen for uniformity with other platforms, and
+ because the DllMain solution seemed to require very tricky code which,
+ at least in the past, imposed hard bounds onthe number of threads.
+ - Many small changes to make thread support work again on Cygwin.
+ - Moved definition of allocator lock etc. to pthread_support.c and
+ win32_threads.c for those two cases.
+ - Got rid of the FASTLOCK() machinery. It doesn't seem useful on modern
+ platforms.
+ - Cleaned up the uncollectable allocation routines, speeding up the
+ slower paths. The code did enough unnecessary work off the critical path
+ that the underlying logic was getting hard to extract.
+ - No longer turn off THREAD_LOCAL_ALLOC with DBG_HDRS_ALL. Indications
+ are it just works, and I think the reasons for it not working disappeared
+ a while ago.
+ - Fixed bugs in hb_n_marks calculation and assertion.
+ - Don't use __builtin_expect for pre-3.0 gcc.
+ - Define GWW_VDB only for recent Microsoft tool chains.
+ - Add overview.html to doc directory.
+ - Fix NT_STATIC_THREADS_MAKEFILE, various compiler warnings.
+ - Made thread local allocation sort of work with Cygwin. The code should
+ be there to deal with other Windows variants, But non-Cygwin Windows
+ threads need more bug fixes.
To do:
- - Fix USE_MARK_BITS.
- REDIRECT_MALLOC and threads combination is getting closer, but currently
usually fails because the DTV (dynamic thread vector) used to access
thread-local storage is referenced only from the base of a thread stack,
Typically large heap sections end up cleared.
- Clone marker inner loop to support arch-dependent prefetching,
and counting of objects marked for finalization.
- - function wrapping??
- The USE_MUNMAP code should really use a separate data structure
indexed by physical page to keep track of time since last use of
a page. Using hblk headers means we lose track of ages when
To use threads, you need to abide by the following requirements:
-1) You need to use LinuxThreads (which are included in libc6).
+1) You need to use LinuxThreads or NPTL (which are included in libc6).
The collector relies on some implementation details of the LinuxThreads
- package. It is unlikely that this code will work on other
+ package. This code may not work on other
pthread implementations (in particular it will *not* work with
MIT pthreads).
-2) You must compile the collector with -DGC_LINUX_THREADS and -D_REENTRANT
- specified in the Makefile.
+2) You must compile the collector with -DGC_LINUX_THREADS (or
+ just -DGC_THREADS) and -D_REENTRANT specified in the Makefile.
3a) Every file that makes thread calls should define GC_LINUX_THREADS and
_REENTRANT and then include gc.h. Gc.h redefines some of the
doc/README.solaris2 \
doc/README.uts \
doc/README.win32 \
+ doc/overview.html \
doc/tree.html \
doc/leak.html \
doc/gcinterface.html \
<HTML>
<HEAD>
<TITLE> Conservative GC Algorithmic Overview </TITLE>
- <AUTHOR> Hans-J. Boehm, HP Labs (Much of this was written at SGI)</author>
+ <AUTHOR> Hans-J. Boehm, HP Labs (Some of this was written at SGI)</author>
</HEAD>
<BODY>
<H1> <I>This is under construction, and may always be.</i> </h1>
Recent versions of the collector support several facilites to enhance
the processor-scalability and thread performance of the collector.
These are discussed in more detail <A HREF="scale.html">here</a>.
+We briefly outline the data approach to thread-local allocation in the
+next section.
+<H2>Thread-local allocation</h2>
+If thread-local allocation is enabled, the collector keeps separate
+arrays of free lists for each thread. Thread-local allocation
+is currently only supported on a few platforms.
+<P>
+The free list arrays associated
+with each thread are only used to satisfy requests for objects that
+are both very small, and belong to one of a small number of well-known
+kinds. These currently include "normal" and pointer-free objects.
+Depending onthe configuration, "gcj" objects may also be included.
+<P>
+Thread-local free list entries contain either a pointer to the first
+element of a free list, or they contain a counter of the number of
+allocation "granules" allocated so far. Initially they contain the
+value one, i.e. a small counter value.
+<P>
+Thread-local allocation allocates directly through the global
+allocator, if the object is of a size or kind not covered by the
+local free lists.
+<P>
+If there is an appropriate local free list, the allocator checks whether it
+contains a sufficiently small counter value. If so, the counter is simply
+incremented by the counter value, and the global allocator is used.
+In this way, the initial few allocations of a given size bypass the local
+allocator. A thread that only allocates a handful of objects of a given
+size will not build up its own free list for that size. This avoids
+wasting space for unpopular objects sizes or kinds.
+<P>
+Once the counter passes a threshold, <TT>GC_malloc_many</tt> is called
+to allocate roughly <TT>HBLKSIZE</tt> space and put it on the corresponding
+local free list. Further allocations of that size and kind then use
+this free list, and no longer need to acquire the allocation lock.
+The allocation procedure is otherwise similar to the global free lists.
+The local free lists are also linked using the first word in the object.
+In most cases this means they require considerably less time.
+<P>
+Local free lists are treated buy most of the rest of the collector
+as though they were in-use reachable data. This requires some care,
+since pointer-free objects are not normally traced, and hence a special
+tracing procedure is required to mark all objects on pointer-free and
+gcj local free lists.
+<P>
+On thread exit, any remaining thread-local free list entries are
+transferred back to the global free list.
+<P>
+Note that if the collector is configured for thread-local allocation,
+GC versions before 7 do not invoke the thread-local allocator by default.
+<TT>GC_malloc</tt> only uses thread-local allocation in version 7 and later.
+In earlier versions, <TT>GC_MALLOC</tt> (all caps) may be directed
+to use thread-local allocation by defining <TT>GC_REDIRECT_TO_LOCAL</tt>
+and then include <TT>gc_local_alloc.h</tt>.
+<P>
+For some more details see <A HREF="scale.html">here</a>, and the
+technical report entitled
+<A HREF="http://www.hpl.hp.com/techreports/2000/HPL-2000-165.html">
+``Fast Multiprocessor Memory Allocation and Garbage Collection''
+</a>
+<P>
+<HR>
<P>
Comments are appreciated. Please send mail to
<A HREF="mailto:boehm@acm.org"><TT>boehm@acm.org</tt></a> or
--- /dev/null
+<!DOCTYPE HTML>
+<html><head><title>A garbage collector for C and C++</title></head>
+<body>
+<table bgcolor="#f0f0ff" cellpadding="10%">
+ <tbody><tr>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcinterface.html">Interface Overview</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/04tutorial.pdf">Tutorial Slides</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">FAQ</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/simple_example.html">Example</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source">Download</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/license.txt">License</a></td>
+ </tr>
+</tbody></table>
+<h1>A garbage collector for C and C++</h1>
+<ul>
+<li><a href="#platforms">Platforms</a>
+</li><li><a href="#multiprocessors">Scalable multiprocessor versions</a>
+</li><li><a href="#details">Some collector details</a>
+</li><li><a href="#further">Further reading</a>
+</li><li><a href="#users">Current users</a>
+</li><li><a href="#collector">Local Links for this collector</a>
+</li><li><a href="#background">Local Background Links</a>
+</li><li><a href="#contacts">Contacts and Mailing List</a>
+</li></ul>
+[ This is an updated version of the page formerly at
+<tt>http://reality.sgi.com/boehm/gc.html</tt>
+and before that at
+<a href="ftp://parcftp.xerox.com/pub/gc/gc.html">
+<tt>ftp://parcftp.xerox.com/pub/gc/gc.html</tt></a>.]
+<p>
+The <a href="http://www.hpl.hp.com/personal/Hans_Boehm">Boehm</a>-<a href="http://www.cs.cornell.edu/annual_report/00-01/bios.htm#demers">Demers</a>-<a href="http://www-sul.stanford.edu/weiser/">Weiser</a>
+conservative garbage collector can
+be used as a garbage collecting
+replacement for C <tt>malloc</tt> or C++ <tt>new</tt>.
+It allows you to allocate memory basically as you normally would,
+without explicitly deallocating memory that is no longer useful.
+The collector automatically recycles memory when it determines
+that it can no longer be otherwise accessed.
+A simple example of such a use is given
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/simple_example.html">here</a>.
+</p><p>
+The collector is also used by a number of programming language
+implementations that either use C as intermediate code, want
+to facilitate easier interoperation with C libraries, or
+just prefer the simple collector interface.
+For a more detailed description of the interface, see
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcinterface.html">here</a>.
+</p><p>
+Alternatively, the garbage collector may be used as
+a <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/leak.html">leak detector</a>
+for C or C++ programs, though that is not its primary goal.
+</p><p>
+Typically several versions will be available.
+Usually you should first try to use
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gc.tar.gz"><tt>gc_source/gc.tar.gz</tt></a>,
+which is normally an older, more stable version.
+</p><p>
+If that fails, try the latest explicitly numbered version
+in <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/">
+<tt>gc_source/</tt></a>.
+Later versions may contain additional features, platform support,
+or bug fixes, but are likely to be less well tested.
+Note that versions containing the letters <tt>alpha</tt> are even less
+well tested than others, especially on non-HP platforms.
+</p><p>
+A slightly older version of the garbage collector is now also
+included as part of the
+<a href="http://gcc.gnu.org/">GNU compiler</a>
+distribution. The source
+code for that version is available for browsing
+<a href="http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/boehm-gc/">here</a>.
+</p><p>
+The arguments for and against conservative garbage collection
+in C and C++ are briefly
+discussed in
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/issues.html">issues.html</a>. The beginnings of
+a frequently-asked-questions list are <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">here</a>.
+</p><p>
+The garbage collector code is copyrighted by
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm">Hans-J. Boehm</a>,
+Alan J. Demers,
+<a href="http://www.xerox.com/">Xerox Corporation</a>,
+<a href="http://www.sgi.com/">Silicon Graphics</a>,
+and
+<a href="http://www.hp.com/">Hewlett-Packard Company</a>.
+It may be used and copied without payment of a fee under minimal restrictions.
+See the README file in the distribution or the
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/license.txt">license</a> for more details.
+<b>IT IS PROVIDED AS IS,
+WITH ABSOLUTELY NO WARRANTY EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK</b>.
+</p><p>
+Empirically, this collector works with most unmodified C programs,
+simply by replacing
+<tt>malloc</tt> with <tt>GC_malloc</tt> calls,
+replacing <tt>realloc</tt> with <tt>GC_realloc</tt> calls, and removing
+free calls. Exceptions are discussed
+in <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/issues.html">issues.html</a>.
+</p><h2><a name="platforms">Platforms</a></h2>
+The collector is not completely portable, but the distribution
+includes ports to most standard PC and UNIX/Linux platforms.
+The collector should work on Linux, *BSD, recent Windows versions,
+MacOS X, HP/UX, Solaris,
+Tru64, Irix and a few other operating systems.
+Some ports are more polished than others.
+<p>
+Irix pthreads, Linux threads, Win32 threads, Solaris threads
+(old style and pthreads),
+HP/UX 11 pthreads, Tru64 pthreads, and MacOS X threads are supported
+in recent versions.
+</p><h3>Separately distributed ports</h3>
+For MacOS 9/Classic use, Patrick Beard's latest port is available from
+<a href="http://homepage.mac.com/pcbeard/gc/">
+<tt>http://homepage.mac.com/pcbeard/gc/</tt></a>.
+(Unfortunately, that's now quite dated.
+I'm not in a position to test under MacOS. Although I try to
+incorporate changes, it is impossible for
+me to update the project file.)
+<p>
+Precompiled versions of the collector for NetBSD are available
+<a href="ftp://ftp.netbsd.org/pub/NetBSD/packages/pkgsrc/devel/boehm-gc/README.html">here</a>
+or
+<a href="http://www.netbsd.org/packages/devel/boehm-gc/README.html">here</a>.
+</p><p>
+<a href="http://www.debian.org/">Debian Linux</a> includes prepackaged
+versions of the collector.
+</p><h2><a name="multiprocessors">Scalable multiprocessor versions</a></h2>
+Kenjiro Taura, Toshio Endo, and Akinori Yonezawa have made available
+a <a href="http://www.yl.is.s.u-tokyo.ac.jp/gc/">parallel collector</a>
+based on this one. Their collector takes advantage of multiple processors
+during a collection. Starting with collector version 6.0alpha1
+we also do this, though with more modest processor scalability goals.
+Our approach is discussed briefly in
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/scale.html"><tt>scale.html</tt></a>.
+<h2><a name="details">Some Collector Details</a></h2>
+The collector uses a <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/complexity.html">mark-sweep</a> algorithm.
+It provides incremental and generational
+collection under operating systems which provide the right kind of
+virtual memory support. (Currently this includes SunOS[45], IRIX,
+OSF/1, Linux, and Windows, with varying restrictions.)
+It allows <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/finalization.html"><i>finalization</i></a> code
+to be invoked when an object is collected.
+It can take advantage of type information to locate pointers if such
+information is provided, but it is usually used without such information.
+ee the README and
+<tt>gc.h</tt> files in the distribution for more details.
+<p>
+For an overview of the implementation, see <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcdescr.html">here</a>.
+</p><p>
+The garbage collector distribution includes a C string
+(<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/cordh.txt"><i>cord</i></a>) package that provides
+for fast concatenation and substring operations on long strings.
+A simple curses- and win32-based editor that represents the entire file
+as a cord is included as a
+sample application.
+</p><p>
+Performance of the nonincremental collector is typically competitive
+with malloc/free implementations. Both space and time overhead are
+likely to be only slightly higher
+for programs written for malloc/free
+(see Detlefs, Dosser and Zorn's
+<a href="ftp://ftp.cs.colorado.edu/pub/techreports/zorn/CU-CS-665-93.ps.Z">Memory Allocation Costs in Large C and C++ Programs</a>.)
+For programs allocating primarily very small objects, the collector
+may be faster; for programs allocating primarily large objects it will
+be slower. If the collector is used in a multithreaded environment
+and configured for thread-local allocation, it may in some cases
+significantly outperform malloc/free allocation in time.
+</p><p>
+We also expect that in many cases any additional overhead
+will be more than compensated for by decreased copying etc.
+if programs are written
+and tuned for garbage collection.
+</p><h1><a name="further">Further Reading:</a></h1>
+<b>The beginnings of a frequently asked questions list for this
+collector are <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">here</a></b>.
+<p>
+<b>The following provide information on garbage collection in general</b>:
+</p><p>
+Paul Wilson's <a href="ftp://ftp.cs.utexas.edu/pub/garbage">garbage collection ftp archive</a> and <a href="ftp://ftp.cs.utexas.edu/pub/garbage/gcsurvey.ps">GC survey</a>.
+</p><p>
+The Ravenbrook <a href="http://www.memorymanagement.org/">
+Memory Management Reference</a>.
+</p><p>
+David Chase's
+<a href="http://www.iecc.com/gclist/GC-faq.html">GC FAQ</a>.
+</p><p>
+Richard Jones'
+<a href="http://www.ukc.ac.uk/computer_science/Html/Jones/gc.html">
+GC page</a> and
+<a href="http://www.cs.kent.ac.uk/people/staff/rej/gcbook/gcbook.html">
+his book</a>.
+</p><p>
+<b>The following papers describe the collector algorithms we use
+and the underlying design decisions at
+a higher level.</b>
+</p><p>
+(Some of the lower level details can be found
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcdescr.html">here</a>.)
+</p><p>
+The first one is not available
+electronically due to copyright considerations. Most of the others are
+subject to ACM copyright.
+</p><p>
+Boehm, H., "Dynamic Memory Allocation and Garbage Collection", <i>Computers in Physics
+9</i>, 3, May/June 1995, pp. 297-303. This is directed at an otherwise sophisticated
+audience unfamiliar with memory allocation issues. The algorithmic details differ
+from those in the implementation. There is a related letter to the editor and a minor
+correction in the next issue.
+</p><p>
+Boehm, H., and <a href="http://www.ubiq.com/hypertext/weiser/weiser.html">M. Weiser</a>,
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/spe_gc_paper">"Garbage Collection in an Uncooperative Environment"</a>,
+<i>Software Practice & Experience</i>, September 1988, pp. 807-820.
+</p><p>
+Boehm, H., A. Demers, and S. Shenker, <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi91.ps.Z">"Mostly Parallel Garbage Collection"</a>, Proceedings
+of the ACM SIGPLAN '91 Conference on Programming Language Design and Implementation,
+<i>SIGPLAN Notices 26</i>, 6 (June 1991), pp. 157-164.
+</p><p>
+Boehm, H., <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi93.ps.Z">"Space Efficient Conservative Garbage Collection"</a>, Proceedings of the ACM
+SIGPLAN '93 Conference on Programming Language Design and Implementation, <i>SIGPLAN
+Notices 28</i>, 6 (June 1993), pp. 197-206.
+</p><p>
+Boehm, H., "Reducing Garbage Collector Cache Misses",
+<i> Proceedings of the 2000 International Symposium on Memory Management </i>.
+<a href="http://portal.acm.org/citation.cfm?doid=362422.362438">
+Official version.</a>
+<a href="http://www.hpl.hp.com/techreports/2000/HPL-2000-99.html">
+Technical report version.</a> Describes the prefetch strategy
+incorporated into the collector for some platforms. Explains why
+the sweep phase of a "mark-sweep" collector should not really be
+a distinct phase.
+</p><p>
+M. Serrano, H. Boehm,
+"Understanding Memory Allocation of Scheme Programs",
+<i>Proceedings of the Fifth ACM SIGPLAN International Conference on
+Functional Programming</i>, 2000, Montreal, Canada, pp. 245-256.
+<a href="http://www.acm.org/pubs/citations/proceedings/fp/351240/p245-serrano/">
+Official version.</a>
+<a href="http://www.hpl.hp.com/techreports/2000/HPL-2000-62.html">
+Earlier Technical Report version.</a> Includes some discussion of the
+collector debugging facilities for identifying causes of memory retention.
+</p><p>
+Boehm, H.,
+"Fast Multiprocessor Memory Allocation and Garbage Collection",
+<a href="http://www.hpl.hp.com/techreports/2000/HPL-2000-165.html">
+HP Labs Technical Report HPL 2000-165</a>. Discusses the parallel
+collection algorithms, and presents some performance results.
+</p><p>
+Boehm, H., "Bounding Space Usage of Conservative Garbage Collectors",
+<i>Proceeedings of the 2002 ACM SIGPLAN-SIGACT Symposium on Principles of
+Programming Languages</i>, Jan. 2002, pp. 93-100.
+<a href="http://portal.acm.org/citation.cfm?doid=503272.503282">
+Official version.</a>
+<a href="http://www.hpl.hp.com/techreports/2001/HPL-2001-251.html">
+Technical report version.</a>
+Includes a discussion of a collector facility to much more reliably test for
+the potential of unbounded heap growth.
+</p><p>
+<b>The following papers discuss language and compiler restrictions necessary to guaranteed
+safety of conservative garbage collection.</b>
+</p><p>
+We thank John Levine and JCLT for allowing
+us to make the second paper available electronically, and providing PostScript for the final
+version.
+</p><p>
+Boehm, H., <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi96.ps.gz">``Simple
+Garbage-Collector-Safety''</a>, Proceedings
+of the ACM SIGPLAN '96 Conference on Programming Language Design
+and Implementation.
+</p><p>
+Boehm, H., and D. Chase, <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/boecha.ps.gz">
+``A Proposal for Garbage-Collector-Safe C Compilation''</a>,
+<i>Journal of C Language Translation 4</i>, 2 (Decemeber 1992), pp. 126-141.
+</p><p>
+<b>Other related information: </b>
+</p><p>
+The Detlefs, Dosser and Zorn's <a href="ftp://ftp.cs.colorado.edu/pub/techreports/zorn/CU-CS-665-93.ps.Z">Memory Allocation Costs in Large C and C++ Programs</a>.
+ This is a performance comparison of the Boehm-Demers-Weiser collector to malloc/free,
+using programs written for malloc/free.
+</p><p>
+Joel Bartlett's <a href="ftp://ftp.digital.com/pub/DEC/CCgc">mostly copying conservative garbage collector for C++</a>.
+</p><p>
+John Ellis and David Detlef's <a href="ftp://parcftp.xerox.com/pub/ellis/gc/gc.ps">Safe Efficient Garbage Collection for C++</a> proposal.
+</p><p>
+Henry Baker's <a href="http://home.pipeline.com/%7Ehbaker1/">paper collection</a>.
+</p><p>
+Slides for Hans Boehm's <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/myths.ps">Allocation and GC Myths</a> talk.
+</p><h1><a name="users">Current users:</a></h1>
+Known current users of some variant of this collector include:
+<p>
+The runtime system for <a href="http://gcc.gnu.org/java">GCJ</a>,
+the static GNU java compiler.
+</p><p>
+<a href="http://w3m.sourceforge.net/">W3m</a>, a text-based web browser.
+</p><p>
+Some versions of the Xerox DocuPrint printer software.
+</p><p>
+The <a href="http://www.mozilla.org/">Mozilla</a> project, as leak
+detector.
+</p><p>
+The <a href="http://www.go-mono.com/">Mono</a> project,
+an open source implementation of the .NET development framework.
+</p><p>
+The <a href="http://www.gnu.org/projects/dotgnu/">DotGNU Portable.NET
+project</a>, another open source .NET implementation.
+</p><p>
+The <a href="http://irssi.org/">Irssi IRC client</a>.
+</p><p>
+<a href="http://titanium.cs.berkeley.edu/">The Berkeley Titanium project</a>.
+</p><p>
+<a href="http://www.nag.co.uk/nagware_fortran_compilers.asp">The NAGWare f90 Fortran 90 compiler</a>.
+</p><p>
+Elwood Corporation's <a href="http://www.elwood.com/eclipse-info/index.htm">
+Eclipse</a> Common Lisp system, C library, and translator.
+</p><p>
+The <a href="http://www-sop.inria.fr/mimosa/fp/Bigloo/">Bigloo
+Scheme</a>
+and <a href="http://kaolin.unice.fr/%7Eserrano/camloo.html">Camloo ML
+compilers</a>
+written by Manuel Serrano and others.
+</p><p>
+Brent Benson's <a href="http://ftp.cs.indiana.edu/pub/scheme-repository/imp/">libscheme</a>.
+</p><p>
+The <a href="http://www.cs.rice.edu/CS/PLT/packages/mzscheme/index.html">MzScheme</a> scheme implementation.
+</p><p>
+The <a href="http://www.cs.washington.edu/research/projects/cecil/www/cecil-home.html">University of Washington Cecil Implementation</a>.
+</p><p>
+<a href="http://www.icsi.berkeley.edu/Sather/">The Berkeley Sather implementation</a>.
+</p><p>
+<a href="http://www.cs.berkeley.edu/%7Eharmonia/">The Berkeley Harmonia Project</a>.
+</p><p>
+The <a href="http://www.cs.arizona.edu/sumatra/toba/">Toba</a> Java Virtual
+Machine to C translator.
+</p><p>
+The <a href="http://www.gwydiondylan.org/">Gwydion Dylan compiler</a>.
+</p><p>
+The <a href="http://gcc.gnu.org/onlinedocs/gcc/Objective-C.html">
+GNU Objective C runtime</a>.
+</p><p>
+<a href="http://www.math.uiuc.edu/Macaulay2">Macaulay 2</a>, a system to support
+research in algebraic geometry and commutative algebra.
+</p><p>
+The <a href="http://www.vestasys.org/">Vesta</a> configuration management
+system.
+</p><p>
+<a href="http://www.visual-prolog.com/vip6">Visual Prolog 6</a>.
+</p><p>
+<a href="http://asymptote.sf.net/">Asymptote LaTeX-compatible
+vector graphics language.</a>
+
+</p><h1><a name="collector">More collector information at this site</a></h1>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/simple_example.html">A simple illustration of how to build and
+use the collector.</a>.
+<p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcinterface.html">Description of alternate interfaces to the
+garbage collector.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/04tutorial.pdf">Slides from an ISMM 2004 tutorial about the GC.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">A FAQ (frequently asked questions) list.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/leak.html">How to use the garbage collector as a leak detector.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/debugging.html">Some hints on debugging garbage collected
+applications.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcdescr.html">An overview of the implementation of the
+garbage collector.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/tree.html">The data structure used for fast pointer lookups.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/scale.html">Scalability of the collector to multiprocessors.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source">Directory containing garbage collector source.</a>
+
+</p><h1><a name="background">More background information at this site</a></h1>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/bounds.html">An attempt to establish a bound on space usage of
+conservative garbage collectors.</a>
+<p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/complexity.html">Mark-sweep versus copying garbage collectors
+and their complexity.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/conservative.html">Pros and cons of conservative garbage collectors,
+in comparison to other collectors.
+</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/issues.html">Issues related to garbage collection vs.
+manual memory management in C/C++.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/example.html">An example of a case in which garbage collection
+results in a much faster implementation as a result of reduced
+synchronization.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/nonmoving">Slide set discussing performance of nonmoving
+garbage collectors.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/popl03/web">
+Slide set discussing <i>Destructors, Finalizers, and Synchronization</i>
+(POPL 2003).</a>
+</p><p>
+<a href="http://portal.acm.org/citation.cfm?doid=604131.604153">
+Paper corresponding to above slide set.</a>
+(<a href="http://www.hpl.hp.com/techreports/2002/HPL-2002-335.html">
+Technical Report version</a>.)
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_bench.html">A Java/Scheme/C/C++ garbage collection benchmark.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/myths.ps">Slides for talk on memory allocation myths.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gctalk.ps">Slides for OOPSLA 98 garbage collection talk.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers">Related papers.</a>
+</p><h1><a name="contacts">Contacts and Mailing List</a><a></a></h1>
+<a>We have recently set up two mailing list for collector announcements
+and discussions:
+</a><ul>
+<li><a href="mailto:gc-announce@linux.hpl.hp.com">gc-announce@linux.hpl.hp.com</a>
+is used for announcements of new versions. Postings are restricted.
+We expect this to always remain a very low volume list.
+</li><li><a href="mailto:gc@linux.hpl.hp.com">gc@linux.hpl.hp.com</a> is used for
+discussions, bug reports, and the like. Subscribers may post.
+On-topic posts by nonsubscribers will usually also be accepted, but
+it may take some time to review them.
+</li></ul>
+To subscribe to these lists, send a mail message containing the
+word "subscribe" to
+<a href="mailto:gc-announce-request@linux.hpl.hp.com?subject=subscribe">gc-announce-request@linux.hpl.hp.com</a>
+or to
+<a href="mailto:gc-request@linux.hpl.hp.com?subject=subscribe">gc-request@linux.hpl.hp.com</a>.
+(Please ignore the instructions about web-based subscription.
+The listed web site is behind the HP firewall.)
+<p>
+The archives for these lists appear
+<a href="http://www.hpl.hp.com/hosted/linux/mail-archives">here</a>.
+The gc list archive may also be read at
+<a href="http://dir.gmane.org/gmane.comp.programming.garbage-collection.boehmgc">gmane.org</a>.
+</p><p>
+Some prior discussion of the collector has taken place on the gcc
+java mailing list, whose archives appear
+<a href="http://gcc.gnu.org/ml/java/">here</a>, and also on
+<a href="http://lists.tunes.org/mailman/listinfo/gclist">gclist@iecc.com</a>.
+</p><p>
+Comments and bug reports may also be sent to
+(<a href="mailto:Hans_Boehm@hp.com">Hans.Boehm@hp.com</a>) or
+(<a href="mailto:boehm@acm.org">boehm@acm.org</a>), but the gc
+mailing list is usually preferred.
+
+</p></body></html>
result = alloc_hdr();
SET_HDR(h, result);
# ifdef USE_MUNMAP
- result -> hb_last_reclaimed = GC_gc_no;
+ result -> hb_last_reclaimed = (unsigned short)GC_gc_no;
# endif
return(result);
}
* A GC_INIT call is required if the collector is built with THREAD_LOCAL_ALLOC
* defined and the initial allocation call is not to GC_malloc().
*/
-#if (defined(sparc) || defined(__sparc)) && defined(sun)
- /*
- * If you are planning on putting
- * the collector in a SunOS 5 dynamic library, you need to call GC_INIT()
- * from the statically loaded program section.
- * This circumvents a Solaris 2.X (X<=4) linker bug.
- */
-# define GC_INIT() { extern end, etext; \
- GC_noop(&end, &etext); \
- GC_init();}
-#elif defined(__CYGWIN32__) && defined(GC_DLL) || defined (_AIX)
+#if defined(__CYGWIN32__) || defined (_AIX)
/*
* Similarly gnu-win32 DLLs need explicit initialization from
* the main program, as does AIX.
extern int _bss_end__[];
# define GC_MAX(x,y) ((x) > (y) ? (x) : (y))
# define GC_MIN(x,y) ((x) < (y) ? (x) : (y))
-# define GC_DATASTART ((GC_PTR) GC_MIN(_data_start__, _bss_start__))
-# define GC_DATAEND ((GC_PTR) GC_MAX(_data_end__, _bss_end__))
+# define GC_DATASTART ((void *) GC_MIN(_data_start__, _bss_start__))
+# define GC_DATAEND ((void *) GC_MAX(_data_end__, _bss_end__))
+# if defined(GC_DLL)
+# define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); }
+# else
+ /* Main program init not required, but other defined needed for */
+ /* uniformity. */
+# define GC_INIT() { GC_init(); }
+# endif
# endif
# if defined(_AIX)
extern int _data[], _end[];
-# define GC_DATASTART ((GC_PTR)((ulong)_data))
-# define GC_DATAEND ((GC_PTR)((ulong)_end))
+# define GC_DATASTART ((void *)((ulong)_data))
+# define GC_DATAEND ((void *)((ulong)_end))
+# define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); }
# endif
-# define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); }
#else
# define GC_INIT() { GC_init(); }
#endif
* Some tests for old macros. These violate our namespace rules and will
* disappear shortly. Use the GC_ names.
*/
-#if defined(SOLARIS_THREADS) || defined(_SOLARIS_THREADS)
+#if defined(SOLARIS_THREADS) || defined(_SOLARIS_THREADS) \
+ || defined(_SOLARIS_PTHREADS) || defined(GC_SOLARIS_PTHREADS)
+ /* We no longer support old style Solaris threads. */
+ /* GC_SOLARIS_THREADS now means pthreads. */
# define GC_SOLARIS_THREADS
#endif
-#if defined(_SOLARIS_PTHREADS)
-# define GC_SOLARIS_PTHREADS
-#endif
#if defined(IRIX_THREADS)
# define GC_IRIX_THREADS
#endif
#endif
#if !defined(_REENTRANT) && (defined(GC_SOLARIS_THREADS) \
- || defined(GC_SOLARIS_PTHREADS) \
|| defined(GC_HPUX_THREADS) \
|| defined(GC_AIX_THREADS) \
|| defined(GC_LINUX_THREADS))
# define _POSIX4A_DRAFT10_SOURCE 1
#endif
-# if defined(GC_SOLARIS_PTHREADS) || defined(GC_FREEBSD_THREADS) || \
+# if defined(GC_SOLARIS_THREADS) || defined(GC_FREEBSD_THREADS) || \
defined(GC_IRIX_THREADS) || defined(GC_LINUX_THREADS) || \
defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS) || \
defined(GC_DGUX386_THREADS) || defined(GC_DARWIN_THREADS) || \
# define GC_IRIX_THREADS
# define GC_PTHREADS
# endif
-# if defined(__sparc) && !defined(__linux__)
-# define GC_SOLARIS_PTHREADS
+# if defined(__sparc) && !defined(__linux__) \
+ || defined(sun) && (defined(i386) || defined(__i386__))
+# define GC_SOLARIS_THREADS
# define GC_PTHREADS
# endif
+
# if defined(__APPLE__) && defined(__MACH__) && defined(__ppc__)
# define GC_DARWIN_THREADS
# define GC_PTHREADS
# endif
#endif
-#if defined(GC_SOLARIS_PTHREADS) && !defined(GC_SOLARIS_THREADS)
-# define GC_SOLARIS_THREADS
-#endif
-
# define __GC
# ifndef _WIN32_WCE
# include <stddef.h>
#include "gc.h"
#include "gc_tiny_fl.h"
-#ifndef __GNUC__
-# define __builtin_expect(x, y) (x)
-#endif
+#if __GNUC__ >= 3
+# define GC_EXPECT(expr, outcome) __builtin_expect(expr,outcome)
+ /* Equivalent to (expr), but predict that usually (expr)==outcome. */
+#else
+# define GC_EXPECT(expr, outcome) (expr)
+#endif /* __GNUC__ */
/* The ultimately general inline allocation macro. Allocate an object */
/* of size bytes, putting the resulting pointer in result. Tiny_fl is */
# define GC_FAST_MALLOC_GRANS(result,granules,tiny_fl,num_direct,\
kind,default_expr,init) \
{ \
- if (__builtin_expect(granules >= GC_TINY_FREELISTS,0)) { \
+ if (GC_EXPECT(granules >= GC_TINY_FREELISTS,0)) { \
result = default_expr; \
} else { \
void **my_fl = tiny_fl + granules; \
void *my_entry=*my_fl; \
void *next; \
\
- while (__builtin_expect((word)my_entry \
+ while (GC_EXPECT((word)my_entry \
<= num_direct + GC_TINY_FREELISTS + 1, 0)) { \
/* Entry contains counter or NULL */ \
if ((word)my_entry - 1 < num_direct) { \
init; \
PREFETCH_FOR_WRITE(next); \
GC_ASSERT(GC_size(result) >= bytes + EXTRA_BYTES); \
- GC_ASSERT(((word *)result)[1] == 0); \
+ GC_ASSERT((kind) == PTRFREE || ((word *)result)[1] == 0); \
out: ; \
} \
}
#ifndef GC_LOCKS_H
#define GC_LOCKS_H
-#include <atomic_ops.h>
-
/*
* Mutual exclusion between allocator/collector routines.
* Needed if there is more than one allocator thread.
- * FASTLOCK() is assumed to try to acquire the lock in a cheap and
- * dirty way that is acceptable for a few instructions, e.g. by
- * inhibiting preemption. This is assumed to have succeeded only
- * if a subsequent call to FASTLOCK_SUCCEEDED() returns TRUE.
- * FASTUNLOCK() is called whether or not FASTLOCK_SUCCEEDED().
- * If signals cannot be tolerated with the FASTLOCK held, then
- * FASTLOCK should disable signals. The code executed under
- * FASTLOCK is otherwise immune to interruption, provided it is
- * not restarted.
- * DCL_LOCK_STATE declares any local variables needed by LOCK and UNLOCK
- * and/or FASTLOCK.
+ * DCL_LOCK_STATE declares any local variables needed by LOCK and UNLOCK.
*
* In the PARALLEL_MARK case, we also need to define a number of
* other inline finctions here:
*
*/
# ifdef THREADS
+# include <atomic_ops.h>
+
void GC_noop1(word);
-# ifdef PCR_OBSOLETE /* Faster, but broken with multiple lwp's */
-# include "th/PCR_Th.h"
-# include "th/PCR_ThCrSec.h"
- extern struct PCR_Th_MLRep GC_allocate_ml;
-# define DCL_LOCK_STATE PCR_sigset_t GC_old_sig_mask
-# define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml)
-# define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
-# define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
-# define FASTLOCK() PCR_ThCrSec_EnterSys()
- /* Here we cheat (a lot): */
-# define FASTLOCK_SUCCEEDED() (*(int *)(&GC_allocate_ml) == 0)
- /* TRUE if nobody currently holds the lock */
-# define FASTUNLOCK() PCR_ThCrSec_ExitSys()
-# endif
# ifdef PCR
# include <base/PCR_Base.h>
# include <th/PCR_Th.h>
PCR_ERes GC_fastLockRes; PCR_sigset_t GC_old_sig_mask
# define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml)
# define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
-# define FASTLOCK() (GC_fastLockRes = PCR_Th_ML_Try(&GC_allocate_ml))
-# define FASTLOCK_SUCCEEDED() (GC_fastLockRes == PCR_ERes_okay)
-# define FASTUNLOCK() {\
- if( FASTLOCK_SUCCEEDED() ) PCR_Th_ML_Release(&GC_allocate_ml); }
# endif
# if !defined(AO_have_test_and_set_acquire)
# define USE_PTHREAD_LOCKS
# endif
+# if defined(GC_WIN32_THREADS) && defined(GC_PTHREADS)
+# define USE_PTHREAD_LOCKS
+# endif
-# if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS)
+# if defined(GC_WIN32_THREADS) && !defined(USE_PTHREAD_LOCKS)
+# include <windows.h>
+# define NO_THREAD (DWORD)(-1)
+ extern DWORD GC_lock_holder;
+ extern CRITICAL_SECTION GC_allocate_ml;
+# ifdef GC_ASSERTIONS
+# define UNCOND_LOCK() \
+ { EnterCriticalSection(&GC_allocate_ml); \
+ SET_LOCK_HOLDER(); }
+# define UNCOND_UNLOCK() \
+ { GC_ASSERT(I_HOLD_LOCK()); UNSET_LOCK_HOLDER(); \
+ LeaveCriticalSection(&GC_allocate_ml); }
+# else
+# define UNCOND_LOCK() EnterCriticalSection(&GC_allocate_ml);
+# define UNCOND_UNLOCK() LeaveCriticalSection(&GC_allocate_ml);
+# endif /* !GC_ASSERTIONS */
+# define SET_LOCK_HOLDER() GC_lock_holder = GetCurrentThreadId()
+# define UNSET_LOCK_HOLDER() GC_lock_holder = NO_THREAD
+# define I_HOLD_LOCK() (!GC_need_to_lock \
+ || GC_lock_holder == GetCurrentThreadId())
+# elif defined(GC_PTHREADS)
# define NO_THREAD (pthread_t)(-1)
# include <pthread.h>
# endif
# endif /* GC_PTHREADS with linux_threads.c implementation */
-# if defined(GC_WIN32_THREADS)
-# if defined(GC_PTHREADS)
-# include <pthread.h>
- extern pthread_mutex_t GC_allocate_ml;
-# define UNCOND_LOCK() pthread_mutex_lock(&GC_allocate_ml)
-# define UNCOND_UNLOCK() pthread_mutex_unlock(&GC_allocate_ml)
-# else
-# include <windows.h>
- GC_API CRITICAL_SECTION GC_allocate_ml;
-# define UNCOND_LOCK() EnterCriticalSection(&GC_allocate_ml);
-# define UNCOND_UNLOCK() LeaveCriticalSection(&GC_allocate_ml);
-# endif
-# endif
-# ifndef SET_LOCK_HOLDER
-# define SET_LOCK_HOLDER()
-# define UNSET_LOCK_HOLDER()
-# define I_HOLD_LOCK() FALSE
- /* Used on platforms were locks can be reacquired, */
- /* so it doesn't matter if we lie. */
-# endif
+
# else /* !THREADS */
-# define LOCK()
-# define UNLOCK()
+# define LOCK()
+# define UNLOCK()
+# define SET_LOCK_HOLDER()
+# define UNSET_LOCK_HOLDER()
+# define I_HOLD_LOCK() TRUE
+ /* Used only in positive assertions or to test whether */
+ /* we still need to acaquire the lock. TRUE works in */
+ /* either case. */
# endif /* !THREADS */
#if defined(UNCOND_LOCK) && !defined(LOCK)
# define UNLOCK() if (GC_need_to_lock) { UNCOND_UNLOCK(); }
#endif
-# ifndef SET_LOCK_HOLDER
-# define SET_LOCK_HOLDER()
-# define UNSET_LOCK_HOLDER()
-# define I_HOLD_LOCK() FALSE
- /* Used on platforms were locks can be reacquired, */
- /* so it doesn't matter if we lie. */
-# endif
-
# ifndef ENTER_GC
# define ENTER_GC()
# define EXIT_GC()
# define DCL_LOCK_STATE
# endif
-# ifndef FASTLOCK
-# define FASTLOCK() LOCK()
-# define FASTLOCK_SUCCEEDED() TRUE
-# define FASTUNLOCK() UNLOCK()
-# endif
-
#endif /* GC_LOCKS_H */
/* Set mark bit, exit if it was already set. */
# ifdef USE_MARK_BITS
-/* FIXME: untested */
-# if defined(THREADS)
- /* Introduces a benign race as in the byte case. */
-# define OR_WORD_EXIT_IF_SET(addr, mask, label) \
- if (!(*(addr) & (mask))) { \
- AO_or((AO_t *)(addr), (mask); \
- } else { \
- goto label; \
- }
-# else /* !THREADS */
-# define OR_WORD_EXIT_IF_SET(addr, mask, label) \
- if (!(*(addr) & (mask))) { \
- *(addr) |= (mask); \
- } else { \
- goto label; \
- }
-# endif
+# ifdef PARALLEL_MARK
+ /* The following may fail to exit even if the bit was already set. */
+ /* For our uses, that's benign: */
+# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
+ { \
+ if (!(*(addr) & (mask))) { \
+ AO_or((AO_t *)(addr), (mask); \
+ } else { \
+ goto label; \
+ } \
+ }
+# else
+# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
+ { \
+ word old = *(addr); \
+ word my_bits = (bits); \
+ if (old & my_bits) goto exit_label; \
+ *(addr) = (old | my_bits); \
+ }
+# endif /* !PARALLEL_MARK */
# define SET_MARK_BIT_EXIT_IF_SET(hhdr,bit_no,exit_label) \
{ \
word * mark_word_addr = hhdr -> hb_marks + divWORDSZ(bit_no); \
# endif
-#if defined(I386) && defined(__GNUC__)
+#ifdef USE_MARK_BYTES
+# if defined(I386) && defined(__GNUC__)
# define LONG_MULT(hprod, lprod, x, y) { \
asm("mull %2" : "=a"(lprod), "=d"(hprod) : "g"(y), "0"(x)); \
}
-#else /* No in-line X86 assembly code */
+# else /* No in-line X86 assembly code */
# define LONG_MULT(hprod, lprod, x, y) { \
unsigned long long prod = (unsigned long long)x \
* (unsigned long long)y; \
hprod = prod >> 32; \
lprod = (unsigned32)prod; \
}
-#endif
+# endif
/* There is a race here, and we may set */
/* the bit twice in the concurrent case. This can result in the */
if (mark_byte) goto exit_label; \
*mark_byte_addr = 1; \
}
+#endif /* USE_MARK_BYTES */
#ifdef PARALLEL_MARK
# define INCR_MARKS(hhdr) \
# ifndef GCCONFIG_H
# include "gcconfig.h"
-# ifndef USE_MARK_BYTES
-# define USE_MARK_BYTES
-# endif
# endif
# ifndef HEADERS_H
#if __GNUC__ >= 3
# define EXPECT(expr, outcome) __builtin_expect(expr,outcome)
-# define INLINE inline
/* Equivalent to (expr), but predict that usually (expr)==outcome. */
+# define INLINE inline
#else
# define EXPECT(expr, outcome) (expr)
# define INLINE
/* */
/*********************************/
-#ifdef SAVE_CALL_CHAIN
-
-/* Fill in the pc and argument information for up to NFRAMES of my */
-/* callers. Ignore my frame and my callers frame. */
-struct callinfo;
-void GC_save_callers(struct callinfo info[NFRAMES]);
-
-void GC_print_callers(struct callinfo info[NFRAMES]);
-
-#endif
-
#ifdef NEED_CALLINFO
struct callinfo {
word ci_pc; /* Caller, not callee, pc */
};
#endif
+#ifdef SAVE_CALL_CHAIN
+
+/* Fill in the pc and argument information for up to NFRAMES of my */
+/* callers. Ignore my frame and my callers frame. */
+void GC_save_callers(struct callinfo info[NFRAMES]);
+
+void GC_print_callers(struct callinfo info[NFRAMES]);
+
+#endif
+
/*********************************/
/* */
# define ABORT(s) PCR_Base_Panic(s)
# else
# ifdef SMALL_CONFIG
-# define ABORT(msg) abort();
+# define ABORT(msg) abort()
# else
GC_API void GC_abort(const char * msg);
-# define ABORT(msg) GC_abort(msg);
+# define ABORT(msg) GC_abort(msg)
# endif
# endif
counter_t hb_n_marks; /* Number of set mark bits, excluding */
/* the one always set at the end. */
/* Currently it is concurrently */
- /* updated and hence only a lower bound.*/
- /* But a zero value does gurantee that */
+ /* updated and hence only approximate. */
+ /* But a zero value does guarantee that */
/* the block contains no marked */
/* objects. */
+ /* Ensuring this property means that we */
+ /* never decrement it to zero during a */
+ /* collection, and hence the count may */
+ /* be one too high. Due to concurrent */
+ /* updates, and arbitrary number of */
+ /* increments, but not all of them (!) */
+ /* may be lost, hence it may in theory */
+ /* be much too low. */
+ /* Without parallel marking, the count */
+ /* is accurate. */
# ifdef USE_MARK_BYTES
union {
char _hb_marks[MARK_BITS_SZ];
word dummy; /* Force word alignment of mark bytes. */
} _mark_byte_union;
# define hb_marks _mark_byte_union._hb_marks
-# define ANY_INDEX 23 /* Random mark bit index for assertions */
# else
word hb_marks[MARK_BITS_SZ];
# endif /* !USE_MARK_BYTES */
};
+# define ANY_INDEX 23 /* "Random" mark bit index for assertions */
+
/* heap block body */
# define HBLK_WORDS (HBLKSIZE/sizeof(word))
/* accessed. */
#ifdef PARALLEL_MARK
# define OR_WORD(addr, bits) \
- { word old; \
- do { \
- old = *((volatile word *)addr); \
- } while (!GC_compare_and_exchange((addr), old, old | (bits))); \
- }
-# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
- { word old; \
- word my_bits = (bits); \
- do { \
- old = *((volatile word *)addr); \
- if (old & my_bits) goto exit_label; \
- } while (!GC_compare_and_exchange((addr), old, old | my_bits)); \
- }
+ { AO_or((volatile AO_t *)(addr), (AO_t)bits); }
#else
# define OR_WORD(addr, bits) *(addr) |= (bits)
-# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
- { \
- word old = *(addr); \
- word my_bits = (bits); \
- if (old & my_bits) goto exit_label; \
- *(addr) = (old | my_bits); \
- }
#endif
/* Mark bit operations */
# if defined(SPARC) || defined(IA64)
/* Cause all stacked registers to be saved in memory. Return a */
/* pointer to the top of the corresponding memory stack. */
- word GC_save_regs_in_stack(void);
+ ptr_t GC_save_regs_in_stack(void);
# endif
/* Push register contents onto mark stack. */
/* If NURSERY is defined, the default push */
/* called by GC_new_hblk, but also */
/* called explicitly without GC lock. */
-struct hblk * GC_allochblk (size_t size_in_bytes, int kind, unsigned flags);
+struct hblk * GC_allochblk (size_t size_in_bytes, int kind,
+ unsigned char flags);
/* Allocate a heap block, inform */
/* the marker that block is valid */
/* for objects of indicated size. */
/* Read retrieved dirty bits. */
GC_bool GC_page_was_ever_dirty(struct hblk *h);
/* Could the page contain valid heap pointers? */
-void GC_is_fresh(struct hblk *h, word n);
- /* Assert the region currently contains no */
- /* valid pointers. */
void GC_remove_protection(struct hblk *h, word nblocks,
GC_bool pointerfree);
/* h is about to be writteni or allocated. Ensure */
/* some other reason. */
# endif /* PARALLEL_MARK */
-# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS)
+# if defined(GC_PTHREADS)
/* We define the thread suspension signal here, so that we can refer */
/* to it in the dirty bit implementation, if necessary. Ideally we */
/* would allocate a (real-time ?) signal using the standard mechanism.*/
# define ALIGNMENT 4 /* Required by hardware */
# define CPP_WORDSZ 32
# endif
+# define USE_ASM_PUSH_REGS
# ifdef SUNOS5
# define OS_TYPE "SUNOS5"
extern int _etext[];
# if !defined(__WATCOMC__) && !defined(GC_WIN32_THREADS)
# define MPROTECT_VDB
# endif
-# define GWW_VDB
+# if _MSC_VER >= 1300 /* .NET, i.e. > VisualStudio 6 */
+# define GWW_VDB
+# else
+# define MPROTECT_VDB
+# endif
# define DATAEND /* not needed */
# endif
# ifdef MSWINCE
# ifdef IA64
# define MACH_TYPE "IA64"
- /* We need to get preserved registers in addition to register */
- /* windows. That's easiest to do with setjmp. */
-# ifdef PARALLEL_MARK
-# define USE_MARK_BYTES
- /* Compare-and-exchange is too expensive to use for */
- /* setting mark bits. */
-# endif
# ifdef HPUX
# ifdef _ILP32
# define CPP_WORDSZ 32
# define THREADS
# endif
+# if !defined(USE_MARK_BITS) && !defined(USE_MARK_BYTES)
+# if defined(THREADS) && defined(PARALLEL_MARK)
+# define USE_MARK_BYTES
+# else
+# define USE_MARK_BITS
+# endif
+# endif
+
# if defined(MSWINCE)
# define NO_GETENV
# endif
# include "private/gc_priv.h"
-# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
- && !defined(GC_WIN32_THREADS)
+# if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS)
#if defined(GC_DARWIN_THREADS)
# include "private/darwin_stop_world.h"
# endif
} * GC_thread;
-# define THREAD_TABLE_SZ 128 /* Must be power of 2 */
+# define THREAD_TABLE_SZ 256 /* Must be power of 2 */
extern volatile GC_thread GC_threads[THREAD_TABLE_SZ];
extern GC_bool GC_thr_initialized;
/* implementation also exports GC_malloc and friends, which */
/* are declared in gc.h. */
+#include "private/gc_priv.h"
+
+#if defined(THREAD_LOCAL_ALLOC)
+
+#include "gc_inline.h"
+
+
+# if defined USE_HPUX_TLS
+# error USE_HPUX_TLS macro was replaced by USE_COMPILER_TLS
+# endif
+
+# if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC) && \
+ !defined(USE_WIN32_COMPILER_TLS) && !defined(USE_COMPILER_TLS) && \
+ !defined(USE_CUSTOM_SPECIFIC)
+# if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32)
+# if defined(__GNUC__) /* Fixed for versions past 2.95? */
+# define USE_WIN32_SPECIFIC
+# else
+# define USE_WIN32_COMPILER_TLS
+# endif /* !GNU */
+# elif defined(LINUX) && defined(__GNUC__)
+# define USE_COMPILER_TLS
+# elif (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \
+ defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS))
+# define USE_PTHREAD_SPECIFIC
+# elif defined(GC_HPUX_THREADS)
+# ifdef __GNUC__
+# define USE_PTHREAD_SPECIFIC
+ /* Empirically, as of gcc 3.3, USE_COMPILER_TLS doesn't work. */
+# else
+# define USE_COMPILER_TLS
+# endif
+# else
+# define USE_CUSTOM_SPECIFIC /* Use our own. */
+# endif
+# endif
+
+# include <stdlib.h>
+
/* One of these should be declared as the tlfs field in the */
/* structure pointed to by a GC_thread. */
typedef struct thread_local_freelists {
# define GC_key_create pthread_key_create
# define GC_remove_specific() /* No need for cleanup on exit. */
typedef pthread_key_t GC_key_t;
-# elif defined(USE_COMPILER_TLS)
+# elif defined(USE_COMPILER_TLS) || defined(USE_WIN32_COMPILER_TLS)
# define GC_getspecific(x) (x)
# define GC_setspecific(key, v) ((key) = (v), 0)
# define GC_key_create(key, d) 0
# define GC_remove_specific() /* No need for cleanup on exit. */
typedef void * GC_key_t;
# elif defined(USE_WIN32_SPECIFIC)
+# include <windows.h>
# define GC_getspecific TlsGetValue
-# define GC_setspecific TlsSetValue
+# define GC_setspecific(key, v) !TlsSetValue(key, v)
+ /* We assume 0 == success, msft does the opposite. */
# define GC_key_create(key, d) \
((d) != 0? (ABORT("Destructor unsupported by TlsAlloc"),0) \
: (*(key) = TlsAlloc(), 0))
# define GC_remove_specific() /* No need for cleanup on thread exit. */
/* Need TlsFree on process exit/detach ? */
-# else
+ typedef DWORD GC_key_t;
+# elif defined(USE_CUSTOM_SPECIFIC)
# include "private/specific.h"
+# else
+# error implement me
# endif
/* we take care of an individual thread freelist structure. */
void GC_mark_thread_local_fls_for(GC_tlfs p);
-#ifdef USE_COMPILER_TLS
+extern
+#if defined(USE_COMPILER_TLS)
__thread
+#elif defined(USE_WIN32_COMPILER_TLS)
+ declspec(thread)
#endif
GC_key_t GC_thread_key;
+
/* This is set up by the thread_local_alloc implementation. But the */
/* thread support layer calls GC_remove_specific(GC_thread_key) */
/* before a thread exits. */
/* And the thread support layer makes sure that GC_thread_key is traced,*/
/* if necessary. */
-
+#endif /* THREAD_LOCAL_ALLOC */
# if defined(SPARC) || defined(IA64)
/* Value returned from register flushing routine; either sp (SPARC) */
/* or ar.bsp (IA64) */
- word GC_save_regs_ret_val;
+ ptr_t GC_save_regs_ret_val;
# endif
/* Routine to mark from registers that are preserved by the C compiler. */
GC_with_callee_saves_pushed(GC_push_current_stack, cold_gc_frame);
}
-/* On register window machines, we need a way to force registers into */
-/* the stack. Return sp. */
-# ifdef SPARC
- asm(" .seg \"text\"");
-# if defined(SVR4) || defined(NETBSD) || defined(FREEBSD)
- asm(" .globl GC_save_regs_in_stack");
- asm("GC_save_regs_in_stack:");
- asm(" .type GC_save_regs_in_stack,#function");
-# else
- asm(" .globl _GC_save_regs_in_stack");
- asm("_GC_save_regs_in_stack:");
-# endif
-# if defined(__arch64__) || defined(__sparcv9)
- asm(" save %sp,-128,%sp");
- asm(" flushw");
- asm(" ret");
- asm(" restore %sp,2047+128,%o0");
-# else
- asm(" ta 0x3 ! ST_FLUSH_WINDOWS");
- asm(" retl");
- asm(" mov %sp,%o0");
-# endif
-# ifdef SVR4
- asm(" .GC_save_regs_in_stack_end:");
- asm(" .size GC_save_regs_in_stack,.GC_save_regs_in_stack_end-GC_save_regs_in_stack");
-# endif
-# ifdef LINT
- word GC_save_regs_in_stack() { return(0 /* sp really */);}
-# endif
-# endif
-
-/* GC_clear_stack_inner(arg, limit) clears stack area up to limit and */
-/* returns arg. Stack clearing is crucial on SPARC, so we supply */
-/* an assembly version that's more careful. Assumes limit is hotter */
-/* than sp, and limit is 8 byte aligned. */
#if defined(ASM_CLEAR_CODE)
-#ifndef SPARC
- --> fix it
-#endif
- asm(".globl GC_clear_stack_inner");
- asm("GC_clear_stack_inner:");
- asm(".type GC_save_regs_in_stack,#function");
-#if defined(__arch64__) || defined(__sparcv9)
- asm("mov %sp,%o2"); /* Save sp */
- asm("add %sp,2047-8,%o3"); /* p = sp+bias-8 */
- asm("add %o1,-2047-192,%sp"); /* Move sp out of the way, */
- /* so that traps still work. */
- /* Includes some extra words */
- /* so we can be sloppy below. */
- asm("loop:");
- asm("stx %g0,[%o3]"); /* *(long *)p = 0 */
- asm("cmp %o3,%o1");
- asm("bgu,pt %xcc, loop"); /* if (p > limit) goto loop */
- asm("add %o3,-8,%o3"); /* p -= 8 (delay slot) */
- asm("retl");
- asm("mov %o2,%sp"); /* Restore sp., delay slot */
-#else
- asm("mov %sp,%o2"); /* Save sp */
- asm("add %sp,-8,%o3"); /* p = sp-8 */
- asm("clr %g1"); /* [g0,g1] = 0 */
- asm("add %o1,-0x60,%sp"); /* Move sp out of the way, */
- /* so that traps still work. */
- /* Includes some extra words */
- /* so we can be sloppy below. */
- asm("loop:");
- asm("std %g0,[%o3]"); /* *(long long *)p = 0 */
- asm("cmp %o3,%o1");
- asm("bgu loop "); /* if (p > limit) goto loop */
- asm("add %o3,-8,%o3"); /* p -= 8 (delay slot) */
- asm("retl");
- asm("mov %o2,%sp"); /* Restore sp., delay slot */
-#endif /* old SPARC */
- /* First argument = %o0 = return value */
-# ifdef SVR4
- asm(" .GC_clear_stack_inner_end:");
- asm(" .size GC_clear_stack_inner,.GC_clear_stack_inner_end-GC_clear_stack_inner");
-# endif
-
# ifdef LINT
/*ARGSUSED*/
ptr_t GC_clear_stack_inner(arg, limit)
ptr_t arg; word limit;
{ return(arg); }
+ /* The real version is in a .S file */
# endif
#endif /* ASM_CLEAR_CODE */
if(SMALL_OBJ(lb)) {
lg = GC_size_map[lb];
opp = &(GC_aobjfreelist[lg]);
- FASTLOCK();
- if( EXPECT(!FASTLOCK_SUCCEEDED() || (op = *opp) == 0, 0) ) {
- FASTUNLOCK();
+ LOCK();
+ if( EXPECT((op = *opp) == 0, 0) ) {
+ UNLOCK();
return(GENERAL_MALLOC((word)lb, PTRFREE));
}
*opp = obj_link(op);
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
return((void *) op);
} else {
return(GENERAL_MALLOC((word)lb, PTRFREE));
if(SMALL_OBJ(lb)) {
lg = GC_size_map[lb];
opp = (void **)&(GC_objfreelist[lg]);
- FASTLOCK();
- if( EXPECT(!FASTLOCK_SUCCEEDED() || (op = *opp) == 0, 0) ) {
- FASTUNLOCK();
+ LOCK();
+ if( EXPECT((op = *opp) == 0, 0) ) {
+ UNLOCK();
return(GENERAL_MALLOC((word)lb, NORMAL));
}
/* See above comment on signals. */
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
return op;
} else {
return(GENERAL_MALLOC(lb, NORMAL));
/* collected anyway. */
lg = GC_size_map[lb];
opp = &(GC_uobjfreelist[lg]);
- FASTLOCK();
- if( FASTLOCK_SUCCEEDED() && (op = *opp) != 0 ) {
+ LOCK();
+ if( (op = *opp) != 0 ) {
/* See above comment on signals. */
*opp = obj_link(op);
obj_link(op) = 0;
/* cleared only temporarily during a collection, as a */
/* result of the normal free list mark bit clearing. */
GC_non_gc_bytes += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
- return((void *) op);
- }
- FASTUNLOCK();
- op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
+ UNLOCK();
+ } else {
+ UNLOCK();
+ op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
+ /* For small objects, the free lists are completely marked. */
+ }
+ GC_ASSERT(0 == op || GC_is_marked(op));
+ return((void *) op);
} else {
- op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
- }
- if (0 == op) return(0);
- /* We don't need the lock here, since we have an undisguised */
- /* pointer. We do need to hold the lock while we adjust */
- /* mark bits. */
- {
- register struct hblk * h;
size_t lb;
+ hdr * hhdr;
- h = HBLKPTR(op);
- lb = HDR(h) -> hb_sz;
+ op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
+ if (0 == op) return(0);
+ GC_ASSERT(((word)op & (HBLKSIZE - 1)) == 0); /* large block */
+ hhdr = HDR((struct hbklk *)op);
+ /* We don't need the lock here, since we have an undisguised */
+ /* pointer. We do need to hold the lock while we adjust */
+ /* mark bits. */
+ lb = hhdr -> hb_sz;
LOCK();
- GC_set_mark_bit(op);
- GC_non_gc_bytes += lb;
+ set_mark_bit_from_hdr(hhdr, 0); /* Only object. */
+ GC_ASSERT(hhdr -> hb_n_marks == 0);
+ hhdr -> hb_n_marks = 1;
UNLOCK();
return((void *) op);
}
/* collected anyway. */
lg = GC_size_map[lg];
opp = &(GC_auobjfreelist[lg]);
- FASTLOCK();
- if( FASTLOCK_SUCCEEDED() && (op = *opp) != 0 ) {
+ LOCK();
+ if( (op = *opp) != 0 ) {
/* See above comment on signals. */
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
/* Mark bit was already set while object was on free list. */
GC_non_gc_bytes += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
- return((void *) op);
- }
- FASTUNLOCK();
- op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
+ UNLOCK();
+ } else {
+ UNLOCK();
+ op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
+ }
+ GC_ASSERT(0 == op || GC_is_marked(op));
+ return((void *) op);
} else {
- op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
- }
- if (0 == op) return(0);
- /* We don't need the lock here, since we have an undisguised */
- /* pointer. We do need to hold the lock while we adjust */
- /* mark bits. */
- {
- struct hblk * h;
size_t lb;
+ hdr * hhdr;
- h = HBLKPTR(op);
- lb = HDR(h) -> hb_sz;
+ op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
+ if (0 == op) return(0);
+
+ GC_ASSERT(((word)op & (HBLKSIZE - 1)) == 0);
+ hhdr = HDR((struct hbklk *)op);
+ lb = hhdr -> hb_sz;
LOCK();
- GC_set_mark_bit(op);
- GC_non_gc_bytes += lb;
+ set_mark_bit_from_hdr(hhdr, 0); /* Only object. */
+ GC_ASSERT(hhdr -> hb_n_marks == 0);
+ hhdr -> hb_n_marks = 1;
UNLOCK();
return((void *) op);
}
/* Set all mark bits in the header. Used for uncollectable blocks. */
void GC_set_hdr_marks(hdr *hhdr)
{
- int i;
+ unsigned i;
size_t sz = hhdr -> hb_sz;
int n_marks = FINAL_MARK_BIT(sz);
int n_marks;
clear_mark_bit_from_hdr(hhdr, bit_no);
n_marks = hhdr -> hb_n_marks - 1;
-# ifdef THREADS
+# ifdef PARALLEL_MARK
if (n_marks != 0)
hhdr -> hb_n_marks = n_marks;
/* Don't decrement to zero. The counts are approximate due to */
word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
register word *p;
- register word q;
+ register ptr_t q;
register word *lim;
register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
register ptr_t least_ha = GC_least_plausible_heap_addr;
/* to be valid. */
lim = t - 1 /* longword */;
for (p = b; p <= lim; p = (word *)(((ptr_t)p) + ALIGNMENT)) {
- q = *p;
+ q = (ptr_t)(*p);
GC_PUSH_ONE_STACK((ptr_t)q, p);
}
# undef GC_greatest_plausible_heap_addr
ptr_t cold_gc_frame)
{
if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) {
-# define EAGER_BYTES 1024
/* Push the hot end of the stack eagerly, so that register values */
/* saved inside GC frames are marked before they disappear. */
/* The rest of the marking can be deferred until later. */
# endif
}
-#if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES)
+#if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \
+ defined(MARK_BIT_PER_GRANULE)
+# if GC_GRANULE_WORDS == 1
+# define USE_PUSH_MARKED_ACCELERATORS
+# define PUSH_GRANULE(q) \
+ { ptr_t qcontents = (ptr_t)((q)[0]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)); }
+# elif GC_GRANULE_WORDS == 2
+# define USE_PUSH_MARKED_ACCELERATORS
+# define PUSH_GRANULE(q) \
+ { ptr_t qcontents = (ptr_t)((q)[0]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)); \
+ qcontents = (ptr_t)((q)[1]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)+1); }
+# elif GC_GRANULE_WORDS == 4
+# define USE_PUSH_MARKED_ACCELERATORS
+# define PUSH_GRANULE(q) \
+ { ptr_t qcontents = (ptr_t)((q)[0]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)); \
+ qcontents = (ptr_t)((q)[1]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)+1); \
+ qcontents = (ptr_t)((q)[2]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)+2); \
+ qcontents = (ptr_t)((q)[3]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)+3); }
+# endif
+#endif
+
+#ifdef USE_PUSH_MARKED_ACCELERATORS
/* Push all objects reachable from marked objects in the given block */
-/* of size 1 objects. */
+/* containing objects of size 1 granule. */
void GC_push_marked1(struct hblk *h, hdr *hhdr)
{
word * mark_word_addr = &(hhdr->hb_marks[0]);
- register word *p;
+ word *p;
word *plim;
- register int i;
- register word q;
- register word mark_word;
- register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
- register ptr_t least_ha = GC_least_plausible_heap_addr;
- register mse * mark_stack_top = GC_mark_stack_top;
- register mse * mark_stack_limit = GC_mark_stack_limit;
+ word *q;
+ word mark_word;
+
+ /* Allow registers to be used for some frequently acccessed */
+ /* global variables. Otherwise aliasing issues are likely */
+ /* to prevent that. */
+ ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
+ ptr_t least_ha = GC_least_plausible_heap_addr;
+ mse * mark_stack_top = GC_mark_stack_top;
+ mse * mark_stack_limit = GC_mark_stack_limit;
# define GC_mark_stack_top mark_stack_top
# define GC_mark_stack_limit mark_stack_limit
# define GC_greatest_plausible_heap_addr greatest_ha
/* go through all words in block */
while( p < plim ) {
mark_word = *mark_word_addr++;
- i = 0;
+ q = p;
while(mark_word != 0) {
if (mark_word & 1) {
- q = p[i];
- GC_PUSH_ONE_HEAP(q, p + i);
+ PUSH_GRANULE(q);
}
- i++;
+ q += GC_GRANULE_WORDS;
mark_word >>= 1;
}
- p += WORDSZ;
+ p += WORDSZ*GC_GRANULE_WORDS;
}
+
# undef GC_greatest_plausible_heap_addr
# undef GC_least_plausible_heap_addr
# undef GC_mark_stack_top
# undef GC_mark_stack_limit
+
GC_mark_stack_top = mark_stack_top;
}
#ifndef UNALIGNED
/* Push all objects reachable from marked objects in the given block */
-/* of size 2 objects. */
+/* of size 2 (granules) objects. */
void GC_push_marked2(struct hblk *h, hdr *hhdr)
{
word * mark_word_addr = &(hhdr->hb_marks[0]);
- register word *p;
+ word *p;
word *plim;
- register int i;
- register word q;
- register word mark_word;
- register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
- register ptr_t least_ha = GC_least_plausible_heap_addr;
- register mse * mark_stack_top = GC_mark_stack_top;
- register mse * mark_stack_limit = GC_mark_stack_limit;
+ word *q;
+ word mark_word;
+
+ ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
+ ptr_t least_ha = GC_least_plausible_heap_addr;
+ mse * mark_stack_top = GC_mark_stack_top;
+ mse * mark_stack_limit = GC_mark_stack_limit;
+
# define GC_mark_stack_top mark_stack_top
# define GC_mark_stack_limit mark_stack_limit
# define GC_greatest_plausible_heap_addr greatest_ha
/* go through all words in block */
while( p < plim ) {
mark_word = *mark_word_addr++;
- i = 0;
+ q = p;
while(mark_word != 0) {
if (mark_word & 1) {
- q = p[i];
- GC_PUSH_ONE_HEAP(q, p + i);
- q = p[i+1];
- GC_PUSH_ONE_HEAP(q, p + i);
+ PUSH_GRANULE(q);
+ PUSH_GRANULE(q + GC_GRANULE_WORDS);
}
- i += 2;
+ q += 2 * GC_GRANULE_WORDS;
mark_word >>= 2;
}
- p += WORDSZ;
+ p += WORDSZ*GC_GRANULE_WORDS;
}
+
# undef GC_greatest_plausible_heap_addr
# undef GC_least_plausible_heap_addr
# undef GC_mark_stack_top
# undef GC_mark_stack_limit
+
GC_mark_stack_top = mark_stack_top;
}
+# if GC_GRANULE_WORDS < 4
/* Push all objects reachable from marked objects in the given block */
-/* of size 4 objects. */
+/* of size 4 (granules) objects. */
/* There is a risk of mark stack overflow here. But we handle that. */
/* And only unmarked objects get pushed, so it's not very likely. */
void GC_push_marked4(struct hblk *h, hdr *hhdr)
{
word * mark_word_addr = &(hhdr->hb_marks[0]);
- register word *p;
+ word *p;
word *plim;
- register int i;
- register word q;
- register word mark_word;
- register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
- register ptr_t least_ha = GC_least_plausible_heap_addr;
- register mse * mark_stack_top = GC_mark_stack_top;
- register mse * mark_stack_limit = GC_mark_stack_limit;
+ word *q;
+ word mark_word;
+
+ ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
+ ptr_t least_ha = GC_least_plausible_heap_addr;
+ mse * mark_stack_top = GC_mark_stack_top;
+ mse * mark_stack_limit = GC_mark_stack_limit;
# define GC_mark_stack_top mark_stack_top
# define GC_mark_stack_limit mark_stack_limit
# define GC_greatest_plausible_heap_addr greatest_ha
/* go through all words in block */
while( p < plim ) {
mark_word = *mark_word_addr++;
- i = 0;
+ q = p;
while(mark_word != 0) {
if (mark_word & 1) {
- q = p[i];
- GC_PUSH_ONE_HEAP(q, p + i);
- q = p[i+1];
- GC_PUSH_ONE_HEAP(q, p + i + 1);
- q = p[i+2];
- GC_PUSH_ONE_HEAP(q, p + i + 2);
- q = p[i+3];
- GC_PUSH_ONE_HEAP(q, p + i + 3);
+ PUSH_GRANULE(q);
+ PUSH_GRANULE(q + GC_GRANULE_WORDS);
+ PUSH_GRANULE(q + 2*GC_GRANULE_WORDS);
+ PUSH_GRANULE(q + 3*GC_GRANULE_WORDS);
}
- i += 4;
+ q += 4 * GC_GRANULE_WORDS;
mark_word >>= 4;
}
- p += WORDSZ;
+ p += WORDSZ*GC_GRANULE_WORDS;
}
# undef GC_greatest_plausible_heap_addr
# undef GC_least_plausible_heap_addr
GC_mark_stack_top = mark_stack_top;
}
+#endif /* GC_GRANULE_WORDS < 4 */
+
#endif /* UNALIGNED */
-#endif /* SMALL_CONFIG */
+#endif /* USE_PUSH_MARKED_ACCELERATORS */
/* Push all objects reachable from marked objects in the given block */
void GC_push_marked(struct hblk *h, hdr *hhdr)
lim = (h + 1)->hb_body - sz;
}
- switch(BYTES_TO_WORDS(sz)) {
-# if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES)
+ switch(BYTES_TO_GRANULES(sz)) {
+# if defined(USE_PUSH_MARKED_ACCELERATORS)
case 1:
GC_push_marked1(h, hhdr);
break;
-# endif
-# if !defined(SMALL_CONFIG) && !defined(UNALIGNED) && \
- !defined(USE_MARK_BYTES)
- case 2:
- GC_push_marked2(h, hhdr);
- break;
- case 4:
- GC_push_marked4(h, hhdr);
- break;
+# if !defined(UNALIGNED)
+ case 2:
+ GC_push_marked2(h, hhdr);
+ break;
+# if GC_GRANULE_WORDS < 4
+ case 4:
+ GC_push_marked4(h, hhdr);
+ break;
+# endif
+# endif
# endif
default:
GC_mark_stack_top_reg = GC_mark_stack_top;
/* If the world is not stopped, this is unsafe. It is */
/* also unnecessary, since we will do this again with the */
/* world stopped. */
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
if (GC_world_stopped) GC_mark_thread_local_free_lists();
# endif
int GC_log; /* Forward decl, so we can set it. */
#endif
-# ifdef THREADS
-# ifdef PCR
-# include "il/PCR_IL.h"
- PCR_Th_ML GC_allocate_ml;
-# elif defined(GC_WIN32_THREADS)
-# if defined(GC_PTHREADS)
- pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
-# elif defined(GC_DLL)
- __declspec(dllexport) CRITICAL_SECTION GC_allocate_ml;
-# else
- CRITICAL_SECTION GC_allocate_ml;
-# endif
-# elif defined(GC_PTHREADS)
-# if defined(USE_SPIN_LOCK)
- pthread_t GC_lock_holder = NO_THREAD;
-# else
- pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
- pthread_t GC_lock_holder = NO_THREAD;
- /* Used only for assertions, and to prevent */
- /* recursive reentry in the system call wrapper. */
-# endif
-# else
- --> declare allocator lock here
-# endif
-# endif
+#if defined(THREADS) && defined(PCR)
+# include "il/PCR_IL.h"
+ PCR_Th_ML GC_allocate_ml;
+#endif
+/* For other platforms with threads, the lock and possibly */
+/* GC_lock_holder variables are defined in the thread support code. */
#if defined(NOSYS) || defined(ECOS)
#undef STACKBASE
/* quantization alogrithm (but we precompute it). */
void GC_init_size_map(void)
{
- register unsigned i;
+ int i;
/* Map size 0 to something bigger. */
/* This avoids problems at lower levels. */
#if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
if (!GC_is_initialized) {
BOOL (WINAPI *pfn) (LPCRITICAL_SECTION, DWORD) = NULL;
- HMODULE hK32 = GetModuleHandle("kernel32.dll");
+ HMODULE hK32 = GetModuleHandleA("kernel32.dll");
if (hK32)
(FARPROC) pfn = GetProcAddress(hK32,
"InitializeCriticalSectionAndSpinCount");
# undef GC_AMIGA_DEF
#endif
-#if defined(MSWIN32) || defined(MSWINCE)
+#if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32)
# define WIN32_LEAN_AND_MEAN
# define NOSERVICE
# include <windows.h>
+ /* It's not clear this is completely kosher under Cygwin. But it */
+ /* allows us to get a working GC_get_stack_base. */
#endif
#ifdef MACOS
&& !defined(MACOS) && !defined(DJGPP) && !defined(DOS4GW) \
&& !defined(NOSYS) && !defined(ECOS)
-# if defined(sigmask) && !defined(UTS4) && !defined(HURD)
+# if 0
/* Use the traditional BSD interface */
# define SIGSET_T int
# define SIG_DEL(set, signal) (set) &= ~(sigmask(signal))
/* longjmp implementations. Most systems appear not to have */
/* a signal 32. */
# define SIGSETMASK(old, new) (old) = sigsetmask(new)
-# else
- /* Use POSIX/SYSV interface */
-# define SIGSET_T sigset_t
-# define SIG_DEL(set, signal) sigdelset(&(set), (signal))
-# define SIG_FILL(set) sigfillset(&set)
-# define SIGSETMASK(old, new) sigprocmask(SIG_SETMASK, &(new), &(old))
# endif
+ /* Use POSIX/SYSV interface */
+# define SIGSET_T sigset_t
+# define SIG_DEL(set, signal) sigdelset(&(set), (signal))
+# define SIG_FILL(set) sigfillset(&set)
+# define SIGSETMASK(old, new) sigprocmask(SIG_SETMASK, &(new), &(old))
+
+
static GC_bool mask_initialized = FALSE;
static SIGSET_T new_mask;
* With threads, GC_mark_roots needs to know how to do this.
* Called with allocator lock held.
*/
-# if defined(MSWIN32) || defined(MSWINCE)
+# if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32)
# define is_writable(prot) ((prot) == PAGE_READWRITE \
|| (prot) == PAGE_WRITECOPY \
|| (prot) == PAGE_EXECUTE_READWRITE \
#endif /* FREEBSD_STACKBOTTOM */
#if !defined(BEOS) && !defined(AMIGA) && !defined(MSWIN32) \
- && !defined(MSWINCE) && !defined(OS2) && !defined(NOSYS) && !defined(ECOS)
+ && !defined(MSWINCE) && !defined(OS2) && !defined(NOSYS) && !defined(ECOS) \
+ && !defined(CYGWIN32)
ptr_t GC_get_main_stack_base(void)
{
# endif /* PCR */
-# if defined(GC_SOLARIS_THREADS) || defined(GC_PTHREADS) || \
- defined(GC_WIN32_THREADS)
+# if defined(GC_PTHREADS) || defined(GC_WIN32_THREADS)
extern void GC_push_all_stacks(void);
GC_push_all_stacks();
}
-# endif /* GC_SOLARIS_THREADS || GC_PTHREADS */
+# endif /* GC_WIN32_THREADS || GC_PTHREADS */
void (*GC_push_other_roots)(void) = GC_default_push_other_roots;
}
# ifndef MPROTECT_VDB
- void GC_is_fresh(struct hblk *h, word n)
- {}
void GC_remove_protection(struct hblk *h, word nblocks, GC_bool is_ptrfree)
{}
# endif
return(TRUE);
}
-/* Reset the n pages starting at h to "was never dirty" status. */
-void GC_is_fresh(struct hblk *h, word n)
-{
-}
-
/* A call that: */
/* I) hints that [h, h+nblocks) is about to be written. */
/* II) guarantees that protection is removed. */
async_set_pht_entry_from_index(GC_dirty_pages, index);
}
-/* Reset the n pages starting at h to "was never dirty" status. */
-void GC_is_fresh(struct hblk *h, word n)
-{
-}
-
/*ARGSUSED*/
void GC_remove_protection(struct hblk *h, word nblocks, GC_bool is_ptrfree)
{
return(TRUE);
}
-/* Reset the n pages starting at h to "was never dirty" status. */
-/*ARGSUSED*/
-void GC_is_fresh(struct hblk *h, word n)
-{
-}
-
# endif /* MPROTECT_VDB */
# ifdef PROC_VDB
word GC_proc_buf_size = INITIAL_BUF_SZ;
char *GC_proc_buf;
-#ifdef GC_SOLARIS_THREADS
-/* We don't have exact sp values for threads. So we count on */
-/* occasionally declaring stack pages to be fresh. Thus we */
-/* need a real implementation of GC_is_fresh. We can't clear */
-/* entries in GC_written_pages, since that would declare all */
-/* pages with the given hash address to be fresh. */
-# define MAX_FRESH_PAGES 8*1024 /* Must be power of 2 */
- struct hblk ** GC_fresh_pages; /* A direct mapped cache. */
- /* Collisions are dropped. */
-
-# define FRESH_PAGE_SLOT(h) (divHBLKSZ((word)(h)) & (MAX_FRESH_PAGES-1))
-# define ADD_FRESH_PAGE(h) \
- GC_fresh_pages[FRESH_PAGE_SLOT(h)] = (h)
-# define PAGE_IS_FRESH(h) \
- (GC_fresh_pages[FRESH_PAGE_SLOT(h)] == (h) && (h) != 0)
-#endif
-
int GC_proc_fd;
void GC_dirty_init(void)
ABORT("/proc ioctl failed");
}
GC_proc_buf = GC_scratch_alloc(GC_proc_buf_size);
-# ifdef GC_SOLARIS_THREADS
- GC_fresh_pages = (struct hblk **)
- GC_scratch_alloc(MAX_FRESH_PAGES * sizeof (struct hblk *));
- if (GC_fresh_pages == 0) {
- GC_err_printf("No space for fresh pages\n");
- EXIT();
- }
- BZERO(GC_fresh_pages, MAX_FRESH_PAGES * sizeof (struct hblk *));
-# endif
}
/* Ignore write hints. They don't help us here. */
{
}
-#ifdef GC_SOLARIS_THREADS
-# define READ(fd,buf,nbytes) syscall(SYS_read, fd, buf, nbytes)
-#else
-# define READ(fd,buf,nbytes) read(fd, buf, nbytes)
-#endif
+# define READ(fd,buf,nbytes) read(fd, buf, nbytes)
void GC_read_dirty(void)
{
/* Punt: */
memset(GC_grungy_pages, 0xff, sizeof (page_hash_table));
memset(GC_written_pages, 0xff, sizeof(page_hash_table));
-# ifdef GC_SOLARIS_THREADS
- BZERO(GC_fresh_pages,
- MAX_FRESH_PAGES * sizeof (struct hblk *));
-# endif
return;
}
}
register word index = PHT_HASH(h);
set_pht_entry_from_index(GC_grungy_pages, index);
-# ifdef GC_SOLARIS_THREADS
- {
- register int slot = FRESH_PAGE_SLOT(h);
-
- if (GC_fresh_pages[slot] == h) {
- GC_fresh_pages[slot] = 0;
- }
- }
-# endif
h++;
}
}
}
/* Update GC_written_pages. */
GC_or_pages(GC_written_pages, GC_grungy_pages);
-# ifdef GC_SOLARIS_THREADS
- /* Make sure that old stacks are considered completely clean */
- /* unless written again. */
- GC_old_stacks_are_fresh();
-# endif
}
#undef READ
GC_bool GC_page_was_dirty(struct hblk *h)
-struct hblk *h;
{
register word index = PHT_HASH(h);
register GC_bool result;
result = get_pht_entry_from_index(GC_grungy_pages, index);
-# ifdef GC_SOLARIS_THREADS
- if (result && PAGE_IS_FRESH(h)) result = FALSE;
- /* This happens only if page was declared fresh since */
- /* the read_dirty call, e.g. because it's in an unused */
- /* thread stack. It's OK to treat it as clean, in */
- /* that case. And it's consistent with */
- /* GC_page_was_ever_dirty. */
-# endif
return(result);
}
register GC_bool result;
result = get_pht_entry_from_index(GC_written_pages, index);
-# ifdef GC_SOLARIS_THREADS
- if (result && PAGE_IS_FRESH(h)) result = FALSE;
-# endif
return(result);
}
-/* Caller holds allocation lock. */
-void GC_is_fresh(struct hblk *h, word n)
-{
-
- register word index;
-
-# ifdef GC_SOLARIS_THREADS
- register word i;
-
- if (GC_fresh_pages != 0) {
- for (i = 0; i < n; i++) {
- ADD_FRESH_PAGE(h + i);
- }
- }
-# endif
-}
-
# endif /* PROC_VDB */
#include "private/pthread_support.h"
-#if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
- && !defined(GC_WIN32_THREADS) && !defined(GC_DARWIN_THREADS)
+#if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS) && \
+ !defined(GC_DARWIN_THREADS)
#include <signal.h>
#include <semaphore.h>
return;
}
# ifdef SPARC
- me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
+ me -> stop_info.stack_ptr = GC_save_regs_in_stack();
# else
me -> stop_info.stack_ptr = (ptr_t)(&dummy);
# endif
# ifdef IA64
- me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
+ me -> backing_store_ptr = GC_save_regs_in_stack();
# endif
/* Tell the thread that wants to stop the world that this */
(unsigned)p -> id, bs_lo, bs_hi);
# endif
if (pthread_equal(p -> id, me)) {
+ /* FIXME: This may add an unbounded number of entries, */
+ /* and hence overflow the mark stack, which is bad. */
GC_push_all_eager(bs_lo, bs_hi);
} else {
GC_push_all_stack(bs_lo, bs_hi);
return n_live_threads;
}
-/* Caller holds allocation lock. */
void GC_stop_world()
{
int i;
int n_live_threads;
int code;
+ GC_ASSERT(I_HOLD_LOCK());
#if DEBUG_THREADS
GC_printf("Stopping the world from 0x%x\n", (unsigned)pthread_self());
#endif
# include <sys/sysctl.h>
#endif /* GC_DARWIN_THREADS */
-
+/* Allocator lock definitions. */
+#if defined(USE_SPIN_LOCK)
+ pthread_t GC_lock_holder = NO_THREAD;
+#else
+ pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
+ pthread_t GC_lock_holder = NO_THREAD;
+ /* Used only for assertions, and to prevent */
+ /* recursive reentry in the system call wrapper. */
+#endif
#if defined(GC_DGUX386_THREADS)
# include <sys/dg_sys_info.h>
GC_check_tls_for(&(p->tlfs));
}
}
-# if !defined(USE_COMPILER_TLS) && !defined(USE_PTHREAD_SPECIFIC)
+# if defined(USE_CUSTOM_SPECIFIC)
if (GC_thread_key != 0)
GC_check_tsd_marks(GC_thread_key);
# endif
void GC_push_thread_structures(void)
{
+ GC_ASSERT(I_HOLD_LOCK());
GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
GC_push_all((ptr_t)(&GC_thread_key),
(ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
# endif
}
-#if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
-#endif /* THREAD_LOCAL_ALLOC */
-
+/* It may not be safe to allocate when we register the first thread. */
static struct GC_Thread_Rep first_thread;
/* Add a thread to GC_threads. We assume it wasn't already there. */
/* Delete a thread from GC_threads. We assume it is there. */
/* (The code intentionally traps if it wasn't.) */
-/* Caller holds allocation lock. */
void GC_delete_thread(pthread_t id)
{
int hv = ((word)id) % THREAD_TABLE_SZ;
register GC_thread p = GC_threads[hv];
register GC_thread prev = 0;
+ GC_ASSERT(I_HOLD_LOCK());
while (!pthread_equal(p -> id, id)) {
prev = p;
p = p -> next;
/* been notified, then there may be more than one thread */
/* in the table with the same pthread id. */
/* This is OK, but we need a way to delete a specific one. */
-void GC_delete_gc_thread(pthread_t id, GC_thread gc_id)
+void GC_delete_gc_thread(GC_thread gc_id)
{
+ pthread_t id = gc_id -> id;
int hv = ((word)id) % THREAD_TABLE_SZ;
register GC_thread p = GC_threads[hv];
register GC_thread prev = 0;
+ GC_ASSERT(I_HOLD_LOCK());
while (p != gc_id) {
prev = p;
p = p -> next;
# if defined(GC_HPUX_THREADS)
GC_nprocs = pthread_num_processors_np();
# endif
-# if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS)
+# if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS) \
+ || defined(GC_SOLARIS_THREADS)
GC_nprocs = sysconf(_SC_NPROCESSORS_ONLN);
if (GC_nprocs <= 0) GC_nprocs = 1;
# endif
/* may require allocation. */
/* Called without allocation lock. */
/* Must be called before a second thread is created. */
-/* Called without allocation lock. */
+/* Did we say it's called without the allocation lock? */
void GC_init_parallel(void)
{
if (parallel_initialized) return;
/* GC_init() calls us back, so set flag first. */
if (!GC_is_initialized) GC_init();
/* Initialize thread local free lists if used. */
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
LOCK();
GC_init_thread_local(&(GC_lookup_thread(pthread_self())->tlfs));
UNLOCK();
me = GC_lookup_thread(pthread_self());
GC_ASSERT(!(me -> thread_blocked));
# ifdef SPARC
- me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
+ me -> stop_info.stack_ptr = GC_save_regs_in_stack();
# elif !defined(GC_DARWIN_THREADS)
- me -> stop_info.stack_ptr = (ptr_t)GC_approx_sp();
+ me -> stop_info.stack_ptr = GC_approx_sp();
# endif
# ifdef IA64
- me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
+ me -> backing_store_ptr = GC_save_regs_in_stack();
# endif
me -> thread_blocked = TRUE;
/* Save context here if we want to support precise stack marking */
/* complete before we remove this thread. */
GC_wait_for_gc_completion(FALSE);
me = GC_lookup_thread(pthread_self());
- GC_destroy_thread_local(&(me->tlfs));
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_destroy_thread_local(&(me->tlfs));
+# endif
if (me -> flags & DETACHED) {
GC_delete_thread(pthread_self());
} else {
me -> flags |= FINISHED;
}
- GC_remove_specific(GC_thread_key);
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_remove_specific();
+# endif
UNLOCK();
return GC_SUCCESS;
}
if (result == 0) {
LOCK();
/* Here the pthread thread id may have been recycled. */
- GC_delete_gc_thread(thread, thread_gc_id);
+ GC_delete_gc_thread(thread_gc_id);
UNLOCK();
}
return result;
thread_gc_id -> flags |= DETACHED;
/* Here the pthread thread id may have been recycled. */
if (thread_gc_id -> flags & FINISHED) {
- GC_delete_gc_thread(thread, thread_gc_id);
+ GC_delete_gc_thread(thread_gc_id);
}
UNLOCK();
}
{
GC_thread me;
- GC_in_thread_creation = TRUE; /* OK to collect from unknow thread. */
+ GC_in_thread_creation = TRUE; /* OK to collect from unknown thread. */
me = GC_new_thread(my_pthread);
GC_in_thread_creation = FALSE;
# ifdef GC_DARWIN_THREADS
sem_post(&(si -> registered)); /* Last action on si. */
/* OK to deallocate. */
pthread_cleanup_push(GC_thread_exit_proc, 0);
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
LOCK();
GC_init_thread_local(&(me->tlfs));
UNLOCK();
}
} else {
GC_bool empty = GC_block_empty(hhdr);
- GC_ASSERT(sz * hhdr -> hb_n_marks <= HBLKSIZE);
+# ifdef PARALLEL_MARK
+ /* Count can be low or one too high. */
+ GC_ASSERT(hhdr -> hb_n_marks <= HBLKSIZE/sz + 1);
+# else
+ GC_ASSERT(sz * hhdr -> hb_n_marks <= HBLKSIZE);
+# endif
if (hhdr -> hb_descr != 0) {
GC_composite_in_use += sz * hhdr -> hb_n_marks;
} else {
#endif /* !USE_MARK_BYTES */
/*ARGSUSED*/
-void GC_print_block_descr(struct hblk *h, word dummy)
+void GC_print_block_descr(struct hblk *h, word /* struct PrintStats */ raw_ps)
{
hdr * hhdr = HDR(h);
unsigned bytes = hhdr -> hb_sz;
bytes += HBLKSIZE-1;
bytes &= ~(HBLKSIZE-1);
- ps = (struct Print_stats *)dummy;
+ ps = (struct Print_stats *)raw_ps;
ps->total_bytes += bytes;
ps->number_of_blocks++;
}
#include <stdio.h>
#include <setjmp.h>
#include <string.h>
-#include "private/gcconfig.h"
+#include "private/gc_priv.h"
#ifdef OS2
/* GETPAGESIZE() is set to getpagesize() by default, but that */
printf("A good guess for ALIGNMENT on this machine is %ld.\n",
(unsigned long)(&(a.a_b))-(unsigned long)(&a));
+ printf("The following is a very dubious test of one root marking"
+ " strategy.\n");
+ printf("Results may not be accurate/useful:\n");
/* Encourage the compiler to keep x in a callee-save register */
x = 2*x-1;
printf("");
y++;
x = 2;
if (y == 1) longjmp(b,1);
+ printf("Some GC internal configuration stuff: \n");
+ printf("\tWORDSZ = %d, ALIGNMENT = %d, GC_GRANULE_BYTES = %d\n",
+ WORDSZ, ALIGNMENT, GC_GRANULE_BYTES);
+ printf("\tUsing one mark ");
+# if defined(USE_MARK_BYTES)
+ printf("byte");
+# elif defined(USE_MARK_BITS)
+ printf("bit");
+# endif
+ printf(" per ");
+# if defined(MARK_BIT_PER_OBJ)
+ printf("object.\n");
+# elif defined(MARK_BIT_PER_GRANULE)
+ printf("granule.\n");
+# endif
+# ifdef THREAD_LOCAL_ALLOC
+ printf("Thread local allocation enabled.\n");
+# endif
+# ifdef PARALLEL_MARK
+ printf("Parallel marking enabled.\n");
+# endif
return(0);
}
.size GC_save_regs_in_stack,.GC_save_regs_in_stack_end-GC_save_regs_in_stack
+! GC_clear_stack_inner(arg, limit) clears stack area up to limit and
+! returns arg. Stack clearing is crucial on SPARC, so we supply
+! an assembly version that's more careful. Assumes limit is hotter
+! than sp, and limit is 8 byte aligned.
.globl GC_clear_stack_inner
GC_clear_stack_inner:
#if defined(__arch64__) || defined(__sparcv9)
int i;
GC_find_leak = 1; /* for new collect versions not compiled */
/* with -DFIND_LEAK. */
+
+ GC_INIT(); /* Needed if thread-local allocation is enabled. */
+ /* FIXME: This is not ideal. */
for (i = 0; i < 10; ++i) {
p[i] = malloc(sizeof(int)+i);
}
CHECK_LEAKS();
CHECK_LEAKS();
CHECK_LEAKS();
+ return 0;
}
# else
# include <assert.h> /* Not normally used, but handy for debugging. */
# endif
-# include <assert.h> /* Not normally used, but handy for debugging. */
# include "gc.h"
# include "gc_typed.h"
# include "private/gc_priv.h" /* For output, locking, MIN_WORDS, */
- /* and some statistics. */
-# include "private/gcconfig.h"
+ /* and some statistics, and gcconfig.h. */
# if defined(MSWIN32) || defined(MSWINCE)
# include <windows.h>
# define GC_printf printf
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-# include <thread.h>
-# include <synch.h>
-# endif
-
# if defined(GC_PTHREADS)
# include <pthread.h>
# endif
}
}
-/* # elif defined(GC_SOLARIS_THREADS) */
-
# else
# define fork_a_thread()
# ifdef PCR
PCR_ThCrSec_EnterSys();
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- static mutex_t incr_lock;
- mutex_lock(&incr_lock);
-# endif
-# if defined(GC_PTHREADS)
+# if defined(GC_PTHREADS)
static pthread_mutex_t incr_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&incr_lock);
-# else
-# ifdef GC_WIN32_THREADS
- EnterCriticalSection(&incr_cs);
-# endif
+# elif defined(GC_WIN32_THREADS)
+ EnterCriticalSection(&incr_cs);
# endif
if ((int)(GC_word)client_data != t -> level) {
(void)GC_printf("Wrong finalization data - collector is broken\n");
# ifdef PCR
PCR_ThCrSec_ExitSys();
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- mutex_unlock(&incr_lock);
-# endif
# if defined(GC_PTHREADS)
pthread_mutex_unlock(&incr_lock);
-# else
-# ifdef GC_WIN32_THREADS
- LeaveCriticalSection(&incr_cs);
-# endif
+# elif defined(GC_WIN32_THREADS)
+ LeaveCriticalSection(&incr_cs);
# endif
}
# ifdef PCR
PCR_ThCrSec_EnterSys();
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- static mutex_t incr_lock;
- mutex_lock(&incr_lock);
-# endif
# if defined(GC_PTHREADS)
static pthread_mutex_t incr_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&incr_lock);
-# else
-# ifdef GC_WIN32_THREADS
- EnterCriticalSection(&incr_cs);
-# endif
+# elif defined(GC_WIN32_THREADS)
+ EnterCriticalSection(&incr_cs);
# endif
/* Losing a count here causes erroneous report of failure. */
finalizable_count++;
# ifdef PCR
PCR_ThCrSec_ExitSys();
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- mutex_unlock(&incr_lock);
-# endif
# if defined(GC_PTHREADS)
pthread_mutex_unlock(&incr_lock);
-# else
-# ifdef GC_WIN32_THREADS
- LeaveCriticalSection(&incr_cs);
-# endif
+# elif defined(GC_WIN32_THREADS)
+ LeaveCriticalSection(&incr_cs);
# endif
}
chktree(t -> rchild, n-1);
}
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-thread_key_t fl_key;
-void * alloc8bytes()
-{
-# if defined(SMALL_CONFIG) || defined(GC_DEBUG)
- collectable_count++;
- return(GC_MALLOC(8));
-# else
- void ** my_free_list_ptr;
- void * my_free_list;
-
- if (thr_getspecific(fl_key, (void **)(&my_free_list_ptr)) != 0) {
- (void)GC_printf("thr_getspecific failed\n");
- FAIL;
- }
- if (my_free_list_ptr == 0) {
- uncollectable_count++;
- my_free_list_ptr = GC_NEW_UNCOLLECTABLE(void *);
- if (thr_setspecific(fl_key, my_free_list_ptr) != 0) {
- (void)GC_printf("thr_setspecific failed\n");
- FAIL;
- }
- }
- my_free_list = *my_free_list_ptr;
- if (my_free_list == 0) {
- collectable_count++;
- my_free_list = GC_malloc_many(8);
- if (my_free_list == 0) {
- (void)GC_printf("alloc8bytes out of memory\n");
- FAIL;
- }
- }
- *my_free_list_ptr = GC_NEXT(my_free_list);
- GC_NEXT(my_free_list) = 0;
- return(my_free_list);
-# endif
-}
-
-#else
-
-# if defined(GC_PTHREADS)
+#if defined(GC_PTHREADS)
pthread_key_t fl_key;
void * alloc8bytes()
# endif
}
-# else
+#else
# define alloc8bytes() GC_MALLOC_ATOMIC(8)
-# endif
#endif
void alloc_small(n)
for (j=0; j<i; j++)
if (q[i] == q[j]) {
GC_printf(
- "Apparently failed to mark form some function arguments.\n"
+ "Apparently failed to mark from some function arguments.\n"
"Perhaps GC_push_regs was configured incorrectly?\n"
);
FAIL;
}
-#if !defined(PCR) && !defined(GC_SOLARIS_THREADS) \
+#if !defined(PCR) \
&& !defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS) \
|| defined(LINT)
#if defined(MSWIN32) && !defined(__MINGW32__)
#define my_assert( e ) \
if (! (e)) { \
- GC_printf1( "Assertion failure in " __FILE__ ", line %d: " #e "\n", \
+ GC_printf( "Assertion failure in " __FILE__ ", line %d: " #e "\n", \
__LINE__ ); \
exit( 1 ); }
x = 0;
# endif
if (argc != 2 || (0 >= (n = atoi( argv[ 1 ] )))) {
- GC_printf0( "usage: test_cpp number-of-iterations\nAssuming 10 iters\n" );
+ GC_printf( "usage: test_cpp number-of-iterations\nAssuming 10 iters\n" );
n = 10;}
for (iters = 1; iters <= n; iters++) {
- GC_printf1( "Starting iteration %d\n", iters );
+ GC_printf( "Starting iteration %d\n", iters );
/* Allocate some uncollectable As and disguise their pointers.
Later we'll check to see if the objects are still there. We're
x = *xptr;
# endif
my_assert (29 == x[0]);
- GC_printf0( "The test appears to have succeeded.\n" );
+ GC_printf( "The test appears to have succeeded.\n" );
return( 0 );}
CHECK_LEAKS();
CHECK_LEAKS();
CHECK_LEAKS();
+ return 0;
}
*/
#include "private/gc_priv.h"
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
#include "private/thread_local_alloc.h"
#include "gc_inline.h"
-# if defined(GC_HPUX_THREADS) && !defined(USE_PTHREAD_SPECIFIC) \
- && !defined(USE_COMPILER_TLS)
-# ifdef __GNUC__
-# define USE_PTHREAD_SPECIFIC
- /* Empirically, as of gcc 3.3, USE_COMPILER_TLS doesn't work. */
-# else
-# define USE_COMPILER_TLS
-# endif
-# endif
-
-# if defined USE_HPUX_TLS
-# error USE_HPUX_TLS macro was replaced by USE_COMPILER_TLS
-# endif
-
-# if (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \
- defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS)) \
- && !defined(USE_PTHREAD_SPECIFIC)
-# define USE_PTHREAD_SPECIFIC
-# endif
-
# include <stdlib.h>
-/* We don't really support thread-local allocation with DBG_HDRS_ALL */
-
-#ifdef USE_COMPILER_TLS
+#if defined(USE_COMPILER_TLS)
__thread
+#elif defined(USE_WIN32_COMPILER_TLS)
+ declspec(thread)
#endif
GC_key_t GC_thread_key;
void *q, **qptr;
for (i = 1; i < TINY_FREELISTS; ++i) {
-#if 0
if ((word)(fl[i]) >= HBLKSIZE) {
if (gfl[i] == 0) {
gfl[i] = fl[i];
gfl[i] = fl[i];
}
}
-#endif
/* Clear fl[i], since the thread structure may hang around. */
/* Do it in a way that is likely to trap if we access it. */
fl[i] = (ptr_t)HBLKSIZE;
# endif
}
-#if defined(GC_ASSERTIONS) && defined(GC_LINUX_THREADS)
+#if defined(GC_ASSERTIONS) && defined(GC_PTHREADS) && !defined(CYGWIN32)
# include <pthread.h>
extern char * GC_lookup_thread(pthread_t id);
#endif
+#if defined(GC_ASSERTIONS) && defined(GC_WIN32_THREADS)
+# include <pthread.h>
+ extern char * GC_lookup_thread(int id);
+#endif
+
void * GC_malloc(size_t bytes)
{
size_t granules = ROUNDED_UP_GRANULES(bytes);
# endif
# ifdef GC_ASSERTIONS
/* We can't check tsd correctly, since we don't have access to */
- /* the right declarations. But we cna check that it's close. */
+ /* the right declarations. But we can check that it's close. */
LOCK();
{
- char * me = GC_lookup_thread(pthread_self());
+# if defined(GC_WIN32_THREADS)
+ char * me = (char *)GC_lookup_thread_inner(GetCurrentThreadId());
+# else
+ char * me = GC_lookup_thread(pthread_self());
+# endif
GC_ASSERT((char *)tsd > me && (char *)tsd < me + 1000);
}
UNLOCK();
}
#endif /* GC_ASSERTIONS */
-# else /* !THREAD_LOCAL_ALLOC && !DBG_HDRS_ALL */
+# else /* !THREAD_LOCAL_ALLOC */
# define GC_destroy_thread_local(t)
"-Wl,--wrap -Wl,pthread_sigmask -Wl,--wrap -Wl,sleep\n");
# endif
# if defined(GC_LINUX_THREADS) || defined(GC_IRIX_THREADS) \
- || defined(GC_SOLARIS_PTHREADS) \
|| defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS)
# ifdef GC_USE_DLOPEN_WRAP
printf("-ldl ");
# if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS)
printf("-lpthread -lrt\n");
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- printf("-lthread -ldl\n");
+# if defined(GC_SOLARIS_THREADS) || defined(GC_SOLARIS_PTHREADS)
+ printf("-lthread -lposix4\n");
+ /* Is this right for recent versions? */
# endif
# if defined(GC_WIN32_THREADS) && defined(CYGWIN32)
printf("-lpthread\n");
if(SMALL_OBJ(lb)) {
lg = GC_size_map[lb];
opp = &(GC_eobjfreelist[lg]);
- FASTLOCK();
- if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
- FASTUNLOCK();
+ LOCK();
+ if( (op = *opp) == 0 ) {
+ UNLOCK();
op = (ptr_t)GENERAL_MALLOC((word)lb, GC_explicit_kind);
if (0 == op) return 0;
lg = GC_size_map[lb]; /* May have been uninitialized. */
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
}
} else {
op = (ptr_t)GENERAL_MALLOC((word)lb, GC_explicit_kind);
if( SMALL_OBJ(lb) ) {
lg = GC_size_map[lb];
opp = &(GC_eobjfreelist[lg]);
- FASTLOCK();
- if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
- FASTUNLOCK();
+ LOCK();
+ if( (op = *opp) == 0 ) {
+ UNLOCK();
op = (ptr_t)GENERAL_MALLOC_IOP(lb, GC_explicit_kind);
lg = GC_size_map[lb]; /* May have been uninitialized. */
} else {
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
}
} else {
op = (ptr_t)GENERAL_MALLOC_IOP(lb, GC_explicit_kind);
if( SMALL_OBJ(lb) ) {
lg = GC_size_map[lb];
opp = &(GC_arobjfreelist[lg]);
- FASTLOCK();
- if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
- FASTUNLOCK();
+ LOCK();
+ if( (op = *opp) == 0 ) {
+ UNLOCK();
op = (ptr_t)GENERAL_MALLOC((word)lb, GC_array_kind);
if (0 == op) return(0);
lg = GC_size_map[lb]; /* May have been uninitialized. */
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
}
} else {
op = (ptr_t)GENERAL_MALLOC((word)lb, GC_array_kind);
/* it to keep the old-style build process working. */
#define GC_TMP_VERSION_MAJOR 7
#define GC_TMP_VERSION_MINOR 0
-#define GC_TMP_ALPHA_VERSION 4
+#define GC_TMP_ALPHA_VERSION 5
#ifndef GC_NOT_ALPHA
# define GC_NOT_ALPHA 0xff
#include <windows.h>
+#ifdef THREAD_LOCAL_ALLOC
+# include "private/thread_local_alloc.h"
+#endif /* THREAD_LOCAL_ALLOC */
+
+/* Allocation lock declarations. */
+#if !defined(USE_PTHREAD_LOCKS)
+# if defined(GC_DLL)
+ __declspec(dllexport) CRITICAL_SECTION GC_allocate_ml;
+# else
+ CRITICAL_SECTION GC_allocate_ml;
+# endif
+ DWORD GC_lock_holder = NO_THREAD;
+ /* Thread id for current holder of allocation lock */
+#else
+ pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
+ pthread_t GC_lock_holder = NO_THREAD;
+#endif
+
#ifdef CYGWIN32
# include <errno.h>
# define DEBUG_CYGWIN_THREADS 0
- void * GC_start_routine(void * arg);
+ void * GC_pthread_start(void * arg);
void GC_thread_exit_proc(void *arg);
# include <pthread.h>
#endif
+#if defined(GC_DLL) && !defined(MSWINCE)
+ static GC_bool GC_win32_dll_threads = FALSE;
+ /* This code operates in two distinct modes, depending on */
+ /* the setting of GC_win32_dll_threads. If */
+ /* GC_win32_dll_threads is set, all threads in the process */
+ /* are implicitly registered with the GC by DllMain. */
+ /* No explicit registration is required, and attempts at */
+ /* explicit registration are ignored. This mode is */
+ /* very different from the Posix operation of the collector. */
+ /* In this mode access to the thread table is lock-free. */
+ /* Hence there is a static limit on the number of threads. */
+
+ /* If GC_win32_dll_threads is FALSE, or the collector is */
+ /* built without GC_DLL defined, things operate in a way */
+ /* that is very similar to Posix platforms, and new threads */
+ /* must be registered with the collector, e.g. by using */
+ /* preprocessor-based interception of the thread primitives. */
+ /* In this case, we use a real data structure for the thread */
+ /* table. Note that there is no equivalent of linker-based */
+ /* call interception, since we don't have ELF-like */
+ /* facilities. The Windows analog appears to be "API */
+ /* hooking", which really seems to be a standard way to */
+ /* do minor binary rewriting (?). I'd prefer not to have */
+ /* the basic collector rely on such facilities, but an */
+ /* optional package that intercepts thread calls this way */
+ /* would probably be nice. */
+
+ /* GC_win32_dll_threads must be set at initialization time, */
+ /* i.e. before any collector or thread calls. We make it a */
+ /* "dynamic" option only to avoid multiple library versions. */
+#else
+# define GC_win32_dll_threads FALSE
+#endif
+
/* The type of the first argument to InterlockedExchange. */
/* Documented to be LONG volatile *, but at least gcc likes */
/* this better. */
typedef LONG * IE_t;
-#ifndef MAX_THREADS
-# define MAX_THREADS 256
- /* FIXME: */
- /* Things may get quite slow for large numbers of threads, */
- /* since we look them up with sequential search. */
-#endif
-
GC_bool GC_thr_initialized = FALSE;
+GC_bool GC_need_to_lock = FALSE;
+
+static GC_bool parallel_initialized = FALSE;
+
+void GC_init_parallel(void);
+
#ifdef GC_DLL
- GC_API GC_bool GC_need_to_lock = TRUE;
+ /* Turn on GC_win32_dll_threads */
+ GC_API void GC_use_DllMain(void)
+ {
+# ifdef THREAD_LOCAL_ALLOC
+ ABORT("Cannot use thread local allocation with DllMain-based "
+ "thread registration.");
+ /* Thread-local allocation really wants to lock at thread */
+ /* entry and exit. */
+# endif
+ GC_need_to_lock = TRUE;
/* Cannot intercept thread creation. */
+ GC_ASSERT(GC_gc_no == 0);
+ GC_win32_dll_threads = TRUE;
+ }
#else
- GC_bool GC_need_to_lock = FALSE;
+ GC_API void GC_use_DllMain(void)
+ {
+ ABORT("GC not configured as DLL");
+ }
#endif
DWORD GC_main_thread = 0;
-struct GC_thread_Rep {
- AO_t in_use; /* Updated without lock. */
- /* We assert that unused */
- /* entries have invalid ids of */
- /* zero and zero stack fields. */
+struct GC_Thread_Rep {
+ union {
+ AO_t tm_in_use; /* Updated without lock. */
+ /* We assert that unused */
+ /* entries have invalid ids of */
+ /* zero and zero stack fields. */
+ /* Used only with GC_win32_dll_threads. */
+ struct GC_Thread_Rep * tm_next;
+ /* Hash table link without */
+ /* GC_win32_dll_threads. */
+ /* More recently allocated threads */
+ /* with a given pthread id come */
+ /* first. (All but the first are */
+ /* guaranteed to be dead, but we may */
+ /* not yet have registered the join.) */
+ } table_management;
+# define in_use table_management.tm_in_use
+# define next table_management.tm_next
DWORD id;
HANDLE handle;
ptr_t stack_base; /* The cold end of the stack. */
# define FINISHED 1 /* Thread has exited. */
# define DETACHED 2 /* Thread is intended to be detached. */
# endif
+# ifdef THREAD_LOCAL_ALLOC
+ struct thread_local_freelists tlfs;
+# endif
};
-typedef volatile struct GC_thread_Rep * GC_thread;
+typedef struct GC_Thread_Rep * GC_thread;
+
/*
* We assumed that volatile ==> memory ordering, at least among
volatile GC_bool GC_please_stop = FALSE;
-/*
- * FIXME: At initialization time we should perhaps chose
- * between two different thread table representations. This simple
- * linear representation may be the best we can reliably do if we use
- * DllMain. By default we should probably rely on thread registration
- * as with the other platforms, and use a hash table or other real
- * data structure.
- */
-volatile struct GC_thread_Rep thread_table[MAX_THREADS];
+/* We have two versions of the thread table. Which one */
+/* we us depends on whether or not GC_win32_dll_threads */
+/* is set. The one complication is that at process */
+/* startup, we use both, since the client hasn't yet */
+/* had a chance to tell us which one (s)he wants. */
+static GC_bool client_has_run = FALSE;
+
+/* Thread table used if GC_win32_dll_threads is set. */
+/* This is a fixed size array. */
+/* Since we use runtime conditionals, both versions */
+/* are always defined. */
+# ifndef MAX_THREADS
+# define MAX_THREADS 512
+# endif
+ /* Things may get quite slow for large numbers of threads, */
+ /* since we look them up with sequential search. */
+
+ volatile struct GC_Thread_Rep dll_thread_table[MAX_THREADS];
+
+ volatile LONG GC_max_thread_index = 0;
+ /* Largest index in dll_thread_table */
+ /* that was ever used. */
+
+/* And now the version used if GC_win32_dll_threads is not set. */
+/* This is a chained hash table, with much of the code borrowed */
+/* From the Posix implementation. */
+# define THREAD_TABLE_SZ 256 /* Must be power of 2 */
+ volatile GC_thread GC_threads[THREAD_TABLE_SZ];
+
-volatile LONG GC_max_thread_index = 0; /* Largest index in thread_table */
- /* that was ever used. */
+/* Add a thread to GC_threads. We assume it wasn't already there. */
+/* Caller holds allocation lock. */
+/* Unlike the pthreads version, the id field is set by the caller. */
+GC_thread GC_new_thread(DWORD id)
+{
+ int hv = ((word)id) % THREAD_TABLE_SZ;
+ GC_thread result;
+ /* It may not be safe to allocate when we register the first thread. */
+ static struct GC_Thread_Rep first_thread;
+ static GC_bool first_thread_used = FALSE;
+
+ GC_ASSERT(I_HOLD_LOCK());
+ if (!first_thread_used) {
+ result = &first_thread;
+ first_thread_used = TRUE;
+ } else {
+ GC_ASSERT(!GC_win32_dll_threads);
+ result = (struct GC_Thread_Rep *)
+ GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL);
+ GC_ASSERT(result -> flags == 0);
+ }
+ if (result == 0) return(0);
+ /* result -> id = id; Done by caller. */
+ result -> next = GC_threads[hv];
+ GC_threads[hv] = result;
+ GC_ASSERT(result -> flags == 0 /* && result -> thread_blocked == 0 */);
+ return(result);
+}
extern LONG WINAPI GC_write_fault_handler(struct _EXCEPTION_POINTERS *exc_info);
/*
* This may be called from DllMain, and hence operates under unusual
- * constraints. In particular, it must be lock-free.
- * Always called from the thread being added.
+ * constraints. In particular, it must be lock-free if GC_win32_dll_threads
+ * is set. Always called from the thread being added.
+ * If GC_win32_dll_threads is not set, we already hold the allocation lock,
+ * except possibly during single-threaded start-up code.
*/
static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
DWORD thread_id)
{
- int i;
- /* It appears to be unsafe to acquire a lock here, since this */
- /* code is apparently not preeemptible on some systems. */
- /* (This is based on complaints, not on Microsoft's official */
- /* documentation, which says this should perform "only simple */
- /* initialization tasks".) */
- /* Hence we make do with nonblocking synchronization. */
+ volatile struct GC_Thread_Rep * me;
/* The following should be a noop according to the win32 */
/* documentation. There is empirical evidence that it */
# if defined(MPROTECT_VDB)
if (GC_incremental) SetUnhandledExceptionFilter(GC_write_fault_handler);
# endif
+
+ if (GC_win32_dll_threads || !client_has_run) {
+ int i;
+ /* It appears to be unsafe to acquire a lock here, since this */
+ /* code is apparently not preeemptible on some systems. */
+ /* (This is based on complaints, not on Microsoft's official */
+ /* documentation, which says this should perform "only simple */
+ /* initialization tasks".) */
+ /* Hence we make do with nonblocking synchronization. */
+ /* It has been claimed that DllMain is really only executed with */
+ /* a particular system lock held, and thus careful use of locking */
+ /* around code that doesn't call back into the system libraries */
+ /* might be OK. But this hasn't been tested across all win32 */
+ /* variants. */
/* cast away volatile qualifier */
- for (i = 0; InterlockedExchange((IE_t)&thread_table[i].in_use,1) != 0; i++) {
- /* Compare-and-swap would make this cleaner, but that's not */
- /* supported before Windows 98 and NT 4.0. In Windows 2000, */
- /* InterlockedExchange is supposed to be replaced by */
- /* InterlockedExchangePointer, but that's not really what I */
- /* want here. */
- /* FIXME: We should eventually declare Win95 dead and use AO_ */
- /* primitives here. */
- if (i == MAX_THREADS - 1)
- ABORT("too many threads");
- }
- /* Update GC_max_thread_index if necessary. The following is safe, */
- /* and unlike CompareExchange-based solutions seems to work on all */
- /* Windows95 and later platforms. */
- /* Unfortunately, GC_max_thread_index may be temporarily out of */
- /* bounds, so readers have to compensate. */
- while (i > GC_max_thread_index) {
- InterlockedIncrement((IE_t)&GC_max_thread_index);
+ for (i = 0; InterlockedExchange((IE_t)&dll_thread_table[i].in_use,1) != 0;
+ i++) {
+ /* Compare-and-swap would make this cleaner, but that's not */
+ /* supported before Windows 98 and NT 4.0. In Windows 2000, */
+ /* InterlockedExchange is supposed to be replaced by */
+ /* InterlockedExchangePointer, but that's not really what I */
+ /* want here. */
+ /* FIXME: We should eventually declare Win95 dead and use AO_ */
+ /* primitives here. */
+ if (i == MAX_THREADS - 1)
+ ABORT("too many threads");
+ }
+ /* Update GC_max_thread_index if necessary. The following is safe, */
+ /* and unlike CompareExchange-based solutions seems to work on all */
+ /* Windows95 and later platforms. */
+ /* Unfortunately, GC_max_thread_index may be temporarily out of */
+ /* bounds, so readers have to compensate. */
+ while (i > GC_max_thread_index) {
+ InterlockedIncrement((IE_t)&GC_max_thread_index);
+ }
+ if (GC_max_thread_index >= MAX_THREADS) {
+ /* We overshot due to simultaneous increments. */
+ /* Setting it to MAX_THREADS-1 is always safe. */
+ GC_max_thread_index = MAX_THREADS - 1;
+ }
+ me = dll_thread_table + i;
}
- if (GC_max_thread_index >= MAX_THREADS) {
- /* We overshot due to simultaneous increments. */
- /* Setting it to MAX_THREADS-1 is always safe. */
- GC_max_thread_index = MAX_THREADS - 1;
+ if (!GC_win32_dll_threads || !client_has_run) {
+ GC_ASSERT(I_HOLD_LOCK() || !client_has_run);
+ me = GC_new_thread(thread_id);
}
# ifdef CYGWIN32
- thread_table[i].pthread_id = pthread_self();
+ me -> pthread_id = pthread_self();
# endif
if (!DuplicateHandle(GetCurrentProcess(),
GetCurrentThread(),
GetCurrentProcess(),
- (HANDLE*)&thread_table[i].handle,
+ (HANDLE*)&(me -> handle),
0,
0,
DUPLICATE_SAME_ACCESS)) {
GC_err_printf("Last error code: %d\n", last_error);
ABORT("DuplicateHandle failed");
}
- thread_table[i].stack_base = sb -> mem_base;
+ me -> stack_base = sb -> mem_base;
/* Up until this point, GC_push_all_stacks considers this thread */
/* invalid. */
- if (thread_table[i].stack_base == NULL)
- ABORT("Bad stack base in GC_register_my_thread");
+ if (me -> stack_base == NULL)
+ ABORT("Bad stack base in GC_register_my_thread_inner");
/* Up until this point, this entry is viewed as reserved but invalid */
/* by GC_delete_thread. */
- thread_table[i].id = thread_id;
+ me -> id = thread_id;
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_init_thread_local((GC_tlfs)(&(me->tlfs)));
+# endif
+ GC_ASSERT(!GC_please_stop || GC_win32_dll_threads);
+ /* Otherwise both we and the thread stopping code would be */
+ /* holding the allocation lock. */
/* If this thread is being created while we are trying to stop */
/* the world, wait here. Hopefully this can't happen on any */
/* systems that don't allow us to block here. */
while (GC_please_stop) Sleep(20);
- return thread_table + i;
+ return (GC_thread)(me);
}
/*
/* without a lock, but should be called in contexts in which the */
/* requested thread cannot be asynchronously deleted, e.g. from the */
/* thread itself. */
-static GC_thread GC_lookup_thread(DWORD thread_id) {
- int i;
- LONG my_max = GC_get_max_thread_index();
-
- for (i = 0;
+/* This version assumes that either GC_win32_dll_threads is set, or */
+/* we hold the allocator lock. */
+/* Also used (for assertion checking only) from thread_local_alloc.c. */
+GC_thread GC_lookup_thread_inner(DWORD thread_id) {
+ if (GC_win32_dll_threads) {
+ int i;
+ LONG my_max = GC_get_max_thread_index();
+ for (i = 0;
i <= my_max &&
- (!AO_load_acquire(&(thread_table[i].in_use))
- || thread_table[i].id != thread_id);
+ (!AO_load_acquire(&(dll_thread_table[i].in_use))
+ || dll_thread_table[i].id != thread_id);
/* Must still be in_use, since nobody else can store our thread_id. */
i++) {}
- if (i > my_max) {
- return 0;
+ if (i > my_max) {
+ return 0;
+ } else {
+ return (GC_thread)(dll_thread_table + i);
+ }
} else {
- return thread_table + i;
+ int hv = ((word)thread_id) % THREAD_TABLE_SZ;
+ register GC_thread p = GC_threads[hv];
+
+ GC_ASSERT(I_HOLD_LOCK());
+ while (p != 0 && p -> id != thread_id) p = p -> next;
+ return(p);
}
}
-int GC_register_my_thread(struct GC_stack_base *sb) {
- DWORD t = GetCurrentThreadId();
-
- if (0 == GC_lookup_thread(t)) {
- /* We lock here, since we want to wait for an ongoing GC. */
+/* A version of the above that acquires the lock if necessary. Note */
+/* that the identically named function for pthreads is different, and */
+/* just assumes we hold the lock. */
+/* Also used (for assertion checking only) from thread_local_alloc.c. */
+static GC_thread GC_lookup_thread(DWORD thread_id)
+{
+ if (GC_win32_dll_threads) {
+ return GC_lookup_thread_inner(thread_id);
+ } else {
+ GC_thread result;
LOCK();
- GC_register_my_thread_inner(sb, t);
+ result = GC_lookup_thread_inner(thread_id);
UNLOCK();
- return GC_SUCCESS;
- } else {
- return GC_DUPLICATE;
+ return result;
}
}
-/* This is intended to be lock-free. */
-/* It is either called synchronously from the thread being deleted, */
-/* or by the joining thread. */
-static void GC_delete_gc_thread(GC_thread thr)
+/* If a thread has been joined, but we have not yet */
+/* been notified, then there may be more than one thread */
+/* in the table with the same win32 id. */
+/* This is OK, but we need a way to delete a specific one. */
+/* Assumes we hold the allocation lock unless */
+/* GC_win32_dll_threads is set. */
+/* If GC_win32_dll_threads is set it should be called from the */
+/* thread being deleted. */
+void GC_delete_gc_thread(GC_thread gc_id)
{
- CloseHandle(thr->handle);
+ if (GC_win32_dll_threads) {
+ /* This is intended to be lock-free. */
+ /* It is either called synchronously from the thread being deleted, */
+ /* or by the joining thread. */
+ CloseHandle(gc_id->handle);
/* cast away volatile qualifier */
- thr->stack_base = 0;
- thr->id = 0;
+ gc_id -> stack_base = 0;
+ gc_id -> id = 0;
# ifdef CYGWIN32
- thr->pthread_id = 0;
+ gc_id -> pthread_id = 0;
# endif /* CYGWIN32 */
- AO_store_release(&(thr->in_use), FALSE);
+ AO_store_release(&(gc_id->in_use), FALSE);
+ } else {
+ DWORD id = gc_id -> id;
+ int hv = ((word)id) % THREAD_TABLE_SZ;
+ register GC_thread p = GC_threads[hv];
+ register GC_thread prev = 0;
+
+ GC_ASSERT(I_HOLD_LOCK());
+ while (p != gc_id) {
+ prev = p;
+ p = p -> next;
+ }
+ if (prev == 0) {
+ GC_threads[hv] = p -> next;
+ } else {
+ prev -> next = p -> next;
+ }
+ GC_INTERNAL_FREE(p);
+ }
}
+/* Delete a thread from GC_threads. We assume it is there. */
+/* (The code intentionally traps if it wasn't.) */
+/* Assumes we hold the allocation lock unless */
+/* GC_win32_dll_threads is set. */
+/* If GC_win32_dll_threads is set it should be called from the */
+/* thread being deleted. */
+void GC_delete_thread(DWORD id)
+{
+ if (GC_win32_dll_threads) {
+ GC_thread t = GC_lookup_thread_inner(id);
-static void GC_delete_thread(DWORD thread_id) {
- GC_thread t = GC_lookup_thread(thread_id);
+ if (0 == t) {
+ WARN("Removing nonexistent thread %ld\n", (GC_word)id);
+ } else {
+ GC_delete_gc_thread(t);
+ }
+ } else {
+ int hv = ((word)id) % THREAD_TABLE_SZ;
+ register GC_thread p = GC_threads[hv];
+ register GC_thread prev = 0;
+
+ GC_ASSERT(I_HOLD_LOCK());
+ while (p -> id != id) {
+ prev = p;
+ p = p -> next;
+ }
+ if (prev == 0) {
+ GC_threads[hv] = p -> next;
+ } else {
+ prev -> next = p -> next;
+ }
+ GC_INTERNAL_FREE(p);
+ }
+}
+
+int GC_register_my_thread(struct GC_stack_base *sb) {
+ DWORD t = GetCurrentThreadId();
- if (0 == t) {
- WARN("Removing nonexistent thread %ld\n", (GC_word)thread_id);
+ if (0 == GC_lookup_thread(t)) {
+ /* We lock here, since we want to wait for an ongoing GC. */
+ LOCK();
+ GC_register_my_thread_inner(sb, t);
+ UNLOCK();
+ return GC_SUCCESS;
} else {
- GC_delete_gc_thread(t);
+ return GC_DUPLICATE;
}
}
int GC_unregister_my_thread(void)
{
- GC_delete_thread(GetCurrentThreadId());
+ if (GC_win32_dll_threads) {
+ /* Should we just ignore this? */
+ GC_delete_thread(GetCurrentThreadId());
+ } else {
+ LOCK();
+ GC_delete_thread(GetCurrentThreadId());
+ UNLOCK();
+ }
+# if defined(THREAD_LOCAL_ALLOC)
+ LOCK();
+ {
+ GC_thread me = GC_lookup_thread_inner(GetCurrentThreadId());
+ GC_destroy_thread_local(&(me->tlfs));
+ }
+ UNLOCK();
+# endif
return GC_SUCCESS;
}
#ifdef CYGWIN32
+/* A quick-and-dirty cache of the mapping between pthread_t */
+/* and win32 thread id. */
+#define PTHREAD_MAP_SIZE 512
+DWORD GC_pthread_map_cache[PTHREAD_MAP_SIZE];
+#define HASH(pthread_id) ((((word)(pthread_id) >> 5)) % PTHREAD_MAP_SIZE)
+ /* It appears pthread_t is really a pointer type ... */
+#define SET_PTHREAD_MAP_CACHE(pthread_id, win32_id) \
+ GC_pthread_map_cache[HASH(pthread_id)] = (win32_id);
+#define GET_PTHREAD_MAP_CACHE(pthread_id) \
+ GC_pthread_map_cache[HASH(pthread_id)]
+
/* Return a GC_thread corresponding to a given pthread_t. */
/* Returns 0 if it's not there. */
/* We assume that this is only called for pthread ids that */
/* have not yet terminated or are still joinable, and */
/* cannot be concurrently terminated. */
+/* Assumes we do NOT hold the allocation lock. */
static GC_thread GC_lookup_pthread(pthread_t id)
{
- int i;
- LONG my_max = GC_get_max_thread_index();
+ if (GC_win32_dll_threads) {
+ int i;
+ LONG my_max = GC_get_max_thread_index();
- for (i = 0;
- i <= my_max &&
- (!AO_load_acquire(&(thread_table[i].in_use))
- || thread_table[i].pthread_id != id);
+ for (i = 0;
+ i <= my_max &&
+ (!AO_load_acquire(&(dll_thread_table[i].in_use))
+ || dll_thread_table[i].pthread_id != id);
/* Must still be in_use, since nobody else can store our thread_id. */
i++);
- if (i > my_max) return 0;
- return thread_table + i;
+ if (i > my_max) return 0;
+ return (GC_thread)(dll_thread_table + i);
+ } else {
+ /* We first try the cache. If that fails, we use a very slow */
+ /* approach. */
+ int hv_guess = GET_PTHREAD_MAP_CACHE(id) % THREAD_TABLE_SZ;
+ int hv;
+ GC_thread p;
+
+ LOCK();
+ for (p = GC_threads[hv_guess]; 0 != p; p = p -> next) {
+ if (pthread_equal(p -> pthread_id, id))
+ goto foundit;
+ }
+ for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) {
+ for (p = GC_threads[hv]; 0 != p; p = p -> next) {
+ if (pthread_equal(p -> pthread_id, id))
+ goto foundit;
+ }
+ }
+ p = 0;
+ foundit:
+ UNLOCK();
+ return p;
+ }
}
#endif /* CYGWIN32 */
void GC_push_thread_structures(void)
{
+ GC_ASSERT(I_HOLD_LOCK());
+ if (GC_win32_dll_threads) {
/* Unlike the other threads implementations, the thread table here */
/* contains no pointers to the collectable heap. Thus we have */
/* no private structures we need to preserve. */
-# ifdef CYGWIN32
- { int i; /* pthreads may keep a pointer in the thread exit value */
- LONG my_max = GC_get_max_thread_index();
+# ifdef CYGWIN32
+ { int i; /* pthreads may keep a pointer in the thread exit value */
+ LONG my_max = GC_get_max_thread_index();
- for (i = 0; i <= my_max; i++)
- if (thread_table[i].in_use)
- GC_push_all((ptr_t)&(thread_table[i].status),
- (ptr_t)(&(thread_table[i].status)+1));
+ for (i = 0; i <= my_max; i++)
+ if (dll_thread_table[i].in_use)
+ GC_push_all((ptr_t)&(dll_thread_table[i].status),
+ (ptr_t)(&(dll_thread_table[i].status)+1));
+ }
+# endif
+ } else {
+ GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
}
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_push_all((ptr_t)(&GC_thread_key),
+ (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
+ /* Just in case we ever use our own TLS implementation. */
# endif
}
+/* Suspend the given thread, if it's still active. */
+GC_suspend(GC_thread t)
+{
+# ifdef MSWINCE
+ /* SuspendThread will fail if thread is running kernel code */
+ while (SuspendThread(t -> handle) == (DWORD)-1)
+ Sleep(10);
+# else
+ /* Apparently the Windows 95 GetOpenFileName call creates */
+ /* a thread that does not properly get cleaned up, and */
+ /* SuspendThread on its descriptor may provoke a crash. */
+ /* This reduces the probability of that event, though it still */
+ /* appears there's a race here. */
+ DWORD exitCode;
+ if (GetExitCodeThread(t -> handle, &exitCode) &&
+ exitCode != STILL_ACTIVE) {
+ t -> stack_base = 0; /* prevent stack from being pushed */
+# ifndef CYGWIN32
+ /* this breaks pthread_join on Cygwin, which is guaranteed to */
+ /* only see user pthreads */
+ AO_store(&(t -> in_use), FALSE);
+ CloseHandle(t -> handle);
+# endif
+ return;
+ }
+ if (SuspendThread(t -> handle) == (DWORD)-1)
+ ABORT("SuspendThread failed");
+# endif
+ t -> suspended = TRUE;
+}
+
+/* Defined in misc.c */
+#ifndef CYGWIN32
+ extern CRITICAL_SECTION GC_write_cs;
+#endif
+
void GC_stop_world(void)
{
DWORD thread_id = GetCurrentThreadId();
int i;
if (!GC_thr_initialized) ABORT("GC_stop_world() called before GC_thr_init()");
+ GC_ASSERT(I_HOLD_LOCK());
GC_please_stop = TRUE;
- for (i = 0; i <= GC_get_max_thread_index(); i++)
- if (thread_table[i].stack_base != 0
- && thread_table[i].id != thread_id) {
-# ifdef MSWINCE
- /* SuspendThread will fail if thread is running kernel code */
- while (SuspendThread(thread_table[i].handle) == (DWORD)-1)
- Sleep(10);
-# else
- /* Apparently the Windows 95 GetOpenFileName call creates */
- /* a thread that does not properly get cleaned up, and */
- /* SuspendThread on its descriptor may provoke a crash. */
- /* This reduces the probability of that event, though it still */
- /* appears there's a race here. */
- DWORD exitCode;
- if (GetExitCodeThread(thread_table[i].handle,&exitCode) &&
- exitCode != STILL_ACTIVE) {
- thread_table[i].stack_base = 0; /* prevent stack from being pushed */
-# ifndef CYGWIN32
- /* this breaks pthread_join on Cygwin, which is guaranteed to */
- /* only see user pthreads */
- AO_store(&(thread_table[i].in_use), FALSE);
- CloseHandle(thread_table[i].handle);
-# endif
- continue;
- }
- if (SuspendThread(thread_table[i].handle) == (DWORD)-1)
- ABORT("SuspendThread failed");
-# endif
- thread_table[i].suspended = TRUE;
+# ifndef CYGWIN32
+ EnterCriticalSection(&GC_write_cs);
+# endif
+ if (GC_win32_dll_threads) {
+ /* Any threads being created during this loop will end up sleeping */
+ /* in the thread registration code until GC_please_stop becomes */
+ /* false. This is not ideal, but hopefully correct. */
+ for (i = 0; i <= GC_get_max_thread_index(); i++) {
+ volatile struct GC_Thread_Rep * t = dll_thread_table + i;
+ if (t -> stack_base != 0
+ && t -> id != thread_id) {
+ GC_suspend((GC_thread)t);
+ }
}
+ } else {
+ GC_thread t;
+ int i;
+
+ for (i = 0; i < THREAD_TABLE_SZ; i++) {
+ for (t = GC_threads[i]; t != 0; t = t -> next) {
+ if (t -> stack_base != 0
+ && t -> id != thread_id) {
+ GC_suspend(t);
+ }
+ }
+ }
+ }
+# ifndef CYGWIN32
+ LeaveCriticalSection(&GC_write_cs);
+# endif
}
void GC_start_world(void)
int i;
LONG my_max = GC_get_max_thread_index();
- for (i = 0; i <= my_max; i++)
- if (thread_table[i].stack_base != 0 && thread_table[i].suspended
- && thread_table[i].id != thread_id) {
- if (ResumeThread(thread_table[i].handle) == (DWORD)-1)
- ABORT("ResumeThread failed");
- thread_table[i].suspended = FALSE;
+ GC_ASSERT(I_HOLD_LOCK());
+ if (GC_win32_dll_threads) {
+ for (i = 0; i <= my_max; i++) {
+ GC_thread t = (GC_thread)(dll_thread_table + i);
+ if (t -> stack_base != 0 && t -> suspended
+ && t -> id != thread_id) {
+ if (ResumeThread(t -> handle) == (DWORD)-1)
+ ABORT("ResumeThread failed");
+ t -> suspended = FALSE;
+ }
}
- GC_please_stop = FALSE;
-}
-
-# ifdef _MSC_VER
-# pragma warning(disable:4715)
-# endif
-ptr_t GC_current_stackbottom(void)
-{
- DWORD thread_id = GetCurrentThreadId();
- int i;
- LONG my_max = GC_get_max_thread_index();
+ } else {
+ GC_thread t;
+ int i;
- for (i = 0; i <= my_max; i++)
- if (thread_table[i].stack_base && thread_table[i].id == thread_id)
- return thread_table[i].stack_base;
- ABORT("no thread table entry for current thread");
+ for (i = 0; i < THREAD_TABLE_SZ; i++) {
+ for (t = GC_threads[i]; t != 0; t = t -> next) {
+ if (t -> stack_base != 0 && t -> suspended
+ && t -> id != thread_id) {
+ if (ResumeThread(t -> handle) == (DWORD)-1)
+ ABORT("ResumeThread failed");
+ t -> suspended = FALSE;
+ }
+ }
+ }
+ }
+ GC_please_stop = FALSE;
}
-# ifdef _MSC_VER
-# pragma warning(default:4715)
-# endif
# ifdef MSWINCE
/* The VirtualQuery calls below won't work properly on WinCE, but */
}
# endif
-void GC_push_all_stacks(void)
+void GC_push_stack_for(GC_thread thread)
{
- DWORD thread_id = GetCurrentThreadId();
- GC_bool found_me = FALSE;
- int i;
- int dummy;
- ptr_t sp, stack_min;
- GC_thread thread;
- LONG my_max = GC_get_max_thread_index();
-
- for (i = 0; i <= my_max; i++) {
- thread = thread_table + i;
- if (thread -> in_use && thread -> stack_base) {
- if (thread -> id == thread_id) {
+ int dummy;
+ ptr_t sp, stack_min;
+ DWORD me = GetCurrentThreadId();
+
+ if (thread -> stack_base) {
+ if (thread -> id == me) {
sp = (ptr_t) &dummy;
- found_me = TRUE;
} else {
CONTEXT context;
context.ContextFlags = CONTEXT_INTEGER|CONTEXT_CONTROL;
- if (!GetThreadContext(thread_table[i].handle, &context))
+ if (!GetThreadContext(thread -> handle, &context))
ABORT("GetThreadContext failed");
/* Push all registers that might point into the heap. Frame */
# else
# error "architecture is not supported"
# endif
- }
+ } /* ! current thread */
stack_min = GC_get_stack_min(thread->stack_base);
- if (sp >= stack_min && sp < thread->stack_base)
+ if (sp >= stack_min && sp < thread->stack_base) {
+# if DEBUG_CYGWIN_THREADS
+ GC_printf("Pushing thread from %p to %p for %d from %d\n",
+ sp, thread -> stack_base, thread -> id, me);
+# endif
GC_push_all_stack(sp, thread->stack_base);
- else {
+ } else {
WARN("Thread stack pointer 0x%lx out of range, pushing everything\n",
(unsigned long)sp);
GC_push_all_stack(stack_min, thread->stack_base);
}
+ } /* thread looks live */
+}
+
+void GC_push_all_stacks(void)
+{
+ DWORD me = GetCurrentThreadId();
+ GC_bool found_me = FALSE;
+
+ if (GC_win32_dll_threads) {
+ int i;
+ LONG my_max = GC_get_max_thread_index();
+
+ for (i = 0; i <= my_max; i++) {
+ GC_thread t = (GC_thread)(dll_thread_table + i);
+ if (t -> in_use) {
+ GC_push_stack_for(t);
+ if (t -> id == me) found_me = TRUE;
+ }
+ }
+ } else {
+ GC_thread t;
+ int i;
+
+ for (i = 0; i < THREAD_TABLE_SZ; i++) {
+ for (t = GC_threads[i]; t != 0; t = t -> next) {
+ GC_push_stack_for(t);
+ if (t -> id == me) found_me = TRUE;
+ }
}
}
if (!found_me) ABORT("Collecting from unknown thread.");
int i;
# define ADDR_LIMIT (char *)(-1L)
char * current_min = ADDR_LIMIT;
- LONG my_max = GC_get_max_thread_index();
+
+ if (GC_win32_dll_threads) {
+ LONG my_max = GC_get_max_thread_index();
- for (i = 0; i <= my_max; i++) {
- char * s = (char *)thread_table[i].stack_base;
+ for (i = 0; i <= my_max; i++) {
+ ptr_t s = (ptr_t)(dll_thread_table[i].stack_base);
if (0 != s && s > start && s < current_min) {
current_min = s;
}
+ }
+ } else {
+ for (i = 0; i < THREAD_TABLE_SZ; i++) {
+ GC_thread t;
+
+ for (t = GC_threads[i]; t != 0; t = t -> next) {
+ ptr_t s = (ptr_t)(t -> stack_base);
+
+ if (0 != s && s > start && s < current_min) {
+ current_min = s;
+ }
+ }
+ }
}
*hi = current_min;
if (current_min == ADDR_LIMIT) {
#if !defined(CYGWIN32)
-#if !defined(MSWINCE) && defined(GC_DLL)
-
-/* We register threads from DllMain */
-
-GC_API HANDLE WINAPI GC_CreateThread(
- LPSECURITY_ATTRIBUTES lpThreadAttributes,
- DWORD dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress,
- LPVOID lpParameter, DWORD dwCreationFlags, LPDWORD lpThreadId )
-{
- return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
- lpParameter, dwCreationFlags, lpThreadId);
-}
-
-#else /* defined(MSWINCE) || !defined(GC_DLL)) */
-
/* We have no DllMain to take care of new threads. Thus we */
/* must properly intercept thread creation. */
thread_args *args;
- if (!GC_is_initialized) GC_init();
- /* make sure GC is initialized (i.e. main thread is attached) */
-
- args = GC_malloc_uncollectable(sizeof(thread_args));
+ if (!parallel_initialized) GC_init_parallel();
+ /* make sure GC is initialized (i.e. main thread is attached,
+ tls initialized) */
+
+ client_has_run = TRUE;
+ if (GC_win32_dll_threads) {
+ return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
+ lpParameter, dwCreationFlags, lpThreadId);
+ } else {
+ args = GC_malloc_uncollectable(sizeof(thread_args));
/* Handed off to and deallocated by child thread. */
- if (0 == args) {
+ if (0 == args) {
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
return NULL;
- }
+ }
- /* set up thread arguments */
+ /* set up thread arguments */
args -> start = lpStartAddress;
args -> param = lpParameter;
- GC_need_to_lock = TRUE;
- thread_h = CreateThread(lpThreadAttributes,
- dwStackSize, thread_start,
- args, dwCreationFlags,
- lpThreadId);
+ GC_need_to_lock = TRUE;
+ thread_h = CreateThread(lpThreadAttributes,
+ dwStackSize, GC_win32_start,
+ args, dwCreationFlags,
+ lpThreadId);
- return thread_h;
+ return thread_h;
+ }
}
-static DWORD WINAPI thread_start(LPVOID arg)
+void * GC_win32_start_inner(struct GC_stack_base *sb, LPVOID arg)
{
- DWORD ret = 0;
+ void * ret;
thread_args *args = (thread_args *)arg;
- struct GC_stack_base *sb;
- GC_get_stack_base(&sb);
- GC_register_my_thread(&sb); /* This waits for an in-progress GC. */
+ GC_register_my_thread(sb); /* This waits for an in-progress GC. */
/* Clear the thread entry even if we exit with an exception. */
/* This is probably pointless, since an uncaught exception is */
#ifndef __GNUC__
__try {
#endif /* __GNUC__ */
- ret = args->start (args->param);
+ ret = (void *)args->start (args->param);
#ifndef __GNUC__
} __finally {
#endif /* __GNUC__ */
+# if defined(THREAD_LOCAL_ALLOC)
+ LOCK();
+ GC_destroy_thread_local(&(me->tlfs));
+ UNLOCK();
+# endif
GC_free(args);
GC_delete_thread(GetCurrentThreadId());
#ifndef __GNUC__
return ret;
}
-#endif /* !defined(MSWINCE) && !(defined(__MINGW32__) && !defined(_DLL)) */
+DWORD WINAPI GC_win32_start(struct GC_stack_base *sb, LPVOID arg)
+{
+ return (DWORD)GC_call_with_stack_base(GC_win32_start_inner, arg);
+}
#endif /* !CYGWIN32 */
#ifdef MSWINCE
/* Called by GC_init() - we hold the allocation lock. */
void GC_thr_init(void) {
struct GC_stack_base sb;
+ int sb_result;
+ GC_ASSERT(I_HOLD_LOCK());
if (GC_thr_initialized) return;
GC_main_thread = GetCurrentThreadId();
GC_thr_initialized = TRUE;
/* Add the initial thread, so we can stop it. */
- GC_get_stack_base(&sb);
+ sb_result = GC_get_stack_base(&sb);
+ GC_ASSERT(sb_result == GC_SUCCESS);
GC_register_my_thread(&sb);
}
(int)pthread_self(), GetCurrentThreadId(), (int)pthread_id);
# endif
+ client_has_run = TRUE;
/* Thread being joined might not have registered itself yet. */
/* After the join,thread id may have been recycled. */
/* FIXME: It would be better if this worked more like */
result = pthread_join(pthread_id, retval);
- /* FIXME: This is an asynchronous deletion, which we said can't */
- /* happen? */
- GC_delete_gc_thread(joinee);
+ if (!GC_win32_dll_threads) {
+ LOCK();
+ GC_delete_gc_thread(joinee);
+ UNLOCK();
+ } /* otherwise dllmain handles it. */
# if DEBUG_CYGWIN_THREADS
GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n",
int result;
struct start_info * si;
- if (!GC_is_initialized) GC_init();
+ if (!parallel_initialized) GC_init_parallel();
/* make sure GC is initialized (i.e. main thread is attached) */
+ client_has_run = TRUE;
+ if (GC_win32_dll_threads) {
+ return pthread_create(new_thread, attr, start_routine, arg);
+ }
/* This is otherwise saved only in an area mmapped by the thread */
/* library, which isn't visible to the collector. */
(int)pthread_self(), GetCurrentThreadId);
# endif
GC_need_to_lock = TRUE;
- result = pthread_create(new_thread, attr, GC_start_routine, si);
+ result = pthread_create(new_thread, attr, GC_pthread_start, si);
if (result) { /* failure */
GC_free(si);
return(result);
}
-void * GC_start_routine(void * arg)
+void * GC_pthread_start_inner(struct GC_stack_base *sb, void * arg)
{
struct start_info * si = arg;
void * result;
void *(*start)(void *);
void *start_arg;
- pthread_t pthread_id;
DWORD thread_id = GetCurrentThreadId();
+ pthread_t pthread_id = pthread_self();
GC_thread me;
GC_bool detached;
int i;
- struct GC_stack_base sb;
# if DEBUG_CYGWIN_THREADS
- GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_self(),
+ GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_id,
thread_id);
# endif
+ GC_ASSERT(!GC_win32_dll_threads);
/* If a GC occurs before the thread is registered, that GC will */
/* ignore this thread. That's fine, since it will block trying to */
/* acquire the allocation lock, and won't yet hold interesting */
LOCK();
/* We register the thread here instead of in the parent, so that */
/* we don't need to hold the allocation lock during pthread_create. */
- GC_get_stack_base(&sb);
- me = GC_register_my_thread_inner(&sb, thread_id);
+ me = GC_register_my_thread_inner(sb, thread_id);
+ SET_PTHREAD_MAP_CACHE(pthread_id, thread_id);
UNLOCK();
start = si -> start_routine;
start_arg = si -> arg;
if (si-> detached) me -> flags |= DETACHED;
- me -> pthread_id = pthread_id = pthread_self();
+ me -> pthread_id = pthread_id;
GC_free(si); /* was allocated uncollectable */
return(result);
}
+void * GC_pthread_start(void * arg)
+{
+ return GC_call_with_stack_base(GC_pthread_start_inner, arg);
+}
+
void GC_thread_exit_proc(void *arg)
{
GC_thread me = (GC_thread)arg;
int i;
+ GC_ASSERT(!GC_win32_dll_threads);
# if DEBUG_CYGWIN_THREADS
GC_printf("thread 0x%x(0x%x) called pthread_exit().\n",
(int)pthread_self(),GetCurrentThreadId());
# endif
LOCK();
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_destroy_thread_local(&(me->tlfs));
+# endif
if (me -> flags & DETACHED) {
GC_delete_thread(GetCurrentThreadId());
} else {
/* nothing required here... */
int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset) {
+ client_has_run = TRUE;
return pthread_sigmask(how, set, oset);
}
int result;
GC_thread thread_gc_id;
+ client_has_run = TRUE;
LOCK();
thread_gc_id = GC_lookup_pthread(thread);
UNLOCK();
{
struct GC_stack_base sb;
DWORD thread_id;
+ int sb_result;
+
+ if (client_has_run && !GC_win32_dll_threads) return TRUE;
switch (reason) {
case DLL_PROCESS_ATTACH:
thread_id = GetCurrentThreadId();
if (GC_main_thread != thread_id) {
/* Don't lock here. */
- GC_get_stack_base(&sb);
+ sb_result = GC_get_stack_base(&sb);
+ GC_ASSERT(sb_result == GC_SUCCESS);
+# ifdef THREAD_LOCAL_ALLOC
+ ABORT("Cannot initialize thread local cache from DllMain");
+# endif
GC_register_my_thread_inner(&sb, thread_id);
} /* o.w. we already did it during GC_thr_init(), called by GC_init() */
break;
case DLL_THREAD_DETACH:
- LOCK(); /* Safe? DllMain description is ambiguous. */
+ /* We are hopefully running in the context of the exiting thread. */
+ client_has_run = TRUE;
+ if (!GC_win32_dll_threads) return TRUE;
GC_delete_thread(GetCurrentThreadId());
- UNLOCK();
break;
case DLL_PROCESS_DETACH:
{
int i;
- LOCK();
+ if (!GC_win32_dll_threads) return TRUE;
for (i = 0; i <= GC_get_max_thread_index(); ++i)
{
- if (AO_load(&(thread_table[i].in_use)))
- GC_delete_gc_thread(thread_table + i);
+ if (AO_load(&(dll_thread_table[i].in_use)))
+ GC_delete_gc_thread(dll_thread_table + i);
}
- UNLOCK();
GC_deinit();
DeleteCriticalSection(&GC_allocate_ml);
# endif /* !MSWINCE */
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+/* Perform all initializations, including those that */
+/* may require allocation. */
+/* Called without allocation lock. */
+/* Must be called before a second thread is created. */
+void GC_init_parallel(void)
+{
+ if (parallel_initialized) return;
+ parallel_initialized = TRUE;
+
+ /* GC_init() calls us back, so set flag first. */
+ if (!GC_is_initialized) GC_init();
+ /* Initialize thread local free lists if used. */
+# if defined(THREAD_LOCAL_ALLOC)
+ LOCK();
+ GC_init_thread_local(&(GC_lookup_thread(GetCurrentThreadId())->tlfs));
+ UNLOCK();
+# endif
+}
+
+#if defined(USE_PTHREAD_LOCKS)
+ /* Support for pthread locking code. */
+ /* Pthread_mutex_try_lock may not win here, */
+ /* due to builtinsupport for spinning first? */
+
+volatile GC_bool GC_collecting = 0;
+ /* A hint that we're in the collector and */
+ /* holding the allocation lock for an */
+ /* extended period. */
+
+void GC_lock(void)
+{
+ pthread_mutex_lock(&GC_allocate_ml);
+}
+#endif /* USE_PTHREAD ... */
-/* We don't really support thread-local allocation with DBG_HDRS_ALL */
+# if defined(THREAD_LOCAL_ALLOC)
/* Add thread-local allocation support. Microsoft uses __declspec(thread) */
+/* We must explicitly mark ptrfree and gcj free lists, since the free */
+/* list links wouldn't otherwise be found. We also set them in the */
+/* normal free lists, since that involves touching less memory than if */
+/* we scanned them normally. */
+void GC_mark_thread_local_free_lists(void)
+{
+ int i;
+ GC_thread p;
+
+ for (i = 0; i < THREAD_TABLE_SZ; ++i) {
+ for (p = GC_threads[i]; 0 != p; p = p -> next) {
+ GC_mark_thread_local_fls_for(&(p->tlfs));
+ }
+ }
+}
+
+#if defined(GC_ASSERTIONS)
+ /* Check that all thread-local free-lists are completely marked. */
+ /* also check that thread-specific-data structures are marked. */
+ void GC_check_tls(void) {
+ int i;
+ GC_thread p;
+
+ for (i = 0; i < THREAD_TABLE_SZ; ++i) {
+ for (p = GC_threads[i]; 0 != p; p = p -> next) {
+ GC_check_tls_for(&(p->tlfs));
+ }
+ }
+# if defined(USE_CUSTOM_SPECIFIC)
+ if (GC_thread_key != 0)
+ GC_check_tsd_marks(GC_thread_key);
+# endif
+ }
+#endif /* GC_ASSERTIONS */
+
#endif /* THREAD_LOCAL_ALLOC ... */
#endif /* GC_WIN32_THREADS */