From 3c50a689ca85f4fe56afbc8da9e894c4cc3af845 Mon Sep 17 00:00:00 2001 From: Ivan Maidanski Date: Tue, 26 Jul 2011 20:30:36 +0400 Subject: [PATCH] gc7.0alpha5 tarball import --- Makefile | 27 +- Makefile.direct | 27 +- Makefile.in | 1 + NT_STATIC_THREADS_MAKEFILE | 4 +- allchblk.c | 12 +- alloc.c | 10 +- configure | 36 +- configure.ac | 18 +- doc/README | 2 +- doc/README.changes | 56 +- doc/README.linux | 8 +- doc/doc.am | 1 + doc/gcdescr.html | 63 ++- doc/overview.html | 446 ++++++++++++++++ headers.c | 2 +- include/gc.h | 29 +- include/gc_config_macros.h | 21 +- include/gc_inline.h | 15 +- include/private/gc_locks.h | 104 ++-- include/private/gc_pmark.h | 45 +- include/private/gc_priv.h | 78 ++- include/private/gcconfig.h | 22 +- include/private/pthread_support.h | 5 +- include/private/thread_local_alloc.h | 58 ++- mach_dep.c | 80 +-- malloc.c | 16 +- mallocx.c | 79 +-- mark.c | 168 +++--- mark_rts.c | 2 +- misc.c | 35 +- os_dep.c | 126 +---- pthread_stop_world.c | 12 +- pthread_support.c | 54 +- reclaim.c | 11 +- setjmp_t.c | 26 +- sparc_mach_dep.S | 4 + tests/leak_test.c | 4 + tests/test.c | 100 +--- tests/test_cpp.cc | 8 +- tests/thread_leak_test.c | 1 + thread_local_alloc.c | 47 +- threadlibs.c | 6 +- typd_mlc.c | 24 +- version.h | 2 +- win32_threads.c | 954 ++++++++++++++++++++++++++--------- 45 files changed, 1851 insertions(+), 998 deletions(-) create mode 100644 doc/overview.html diff --git a/Makefile b/Makefile index 24326c0..78e8159 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ VPATH= $(srcdir) # Atomic_ops installation directory. If this doesn't exist, we create # it from the included libatomic_ops distribution. -AO_VERSION=1.0 +AO_VERSION=1.1 AO_SRC_DIR=$(srcdir)/libatomic_ops-$(AO_VERSION) AO_INSTALL_DIR=$(srcdir)/libatomic_ops-install @@ -349,7 +349,7 @@ SRCS= $(CSRCS) mips_sgi_mach_dep.s rs6000_mach_dep.s alpha_mach_dep.S \ DOC_FILES= README.QUICK doc/README.Mac doc/README.MacOSX doc/README.OS2 \ doc/README.amiga doc/README.cords doc/debugging.html \ - doc/porting.html \ + doc/porting.html doc/overview.html \ doc/README.dj doc/README.hp doc/README.linux doc/README.rs6000 \ doc/README.sgi doc/README.solaris2 doc/README.uts \ doc/README.win32 doc/barrett_diagram doc/README \ @@ -450,8 +450,9 @@ $(OBJS) tests/test.o dyn_load.o dyn_load_sunos53.o: \ mark.o typd_mlc.o finalize.o ptr_chck.o: $(srcdir)/include/gc_mark.h \ $(srcdir)/include/private/gc_pmark.h -specific.o pthread_support.o: $(srcdir)/include/private/specific.h \ - $(srcdir)/include/gc_inline.h +specific.o pthread_support.o thread_local_alloc.o win32_threads.o: \ + $(srcdir)/include/private/specific.h $(srcdir)/include/gc_inline.h \ + $(srcdir)/include/private/thread_local_alloc.h dbg_mlc.o gcj_mlc.o: $(srcdir)/include/private/dbg_mlc.h @@ -465,6 +466,7 @@ tests: base_lib gc.a: $(OBJS) dyn_load.o $(UTILS) echo > base_lib rm -f dont_ar_1 + cp $(AO_INSTALL_DIR)/lib/libatomic_ops.a gc.a ./if_mach SPARC SUNOS5 touch dont_ar_1 ./if_mach SPARC SUNOS5 $(AR) rus gc.a $(OBJS) dyn_load.o ./if_mach M68K AMIGA touch dont_ar_1 @@ -513,7 +515,7 @@ dyn_load_sunos53.o: dyn_load.c # SunOS5 shared library version of the collector sunos5gc.so: $(OBJS) dyn_load_sunos53.o - $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o -ldl + $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o $(AO_INSTALL_DIR)/lib/libatomic_ops.a -ldl ln sunos5gc.so libgc.so # Alpha/OSF shared library version of the collector @@ -556,14 +558,11 @@ mach_dep.o: $(srcdir)/mach_dep.c $(srcdir)/mips_sgi_mach_dep.s \ $(srcdir)/ia64_save_regs_in_stack.s \ $(srcdir)/sparc_netbsd_mach_dep.s $(UTILS) rm -f mach_dep.o - ./if_mach MIPS IRIX5 $(CC) -c -o mach_dep.o $(srcdir)/mips_sgi_mach_dep.s - ./if_mach MIPS RISCOS $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s - ./if_mach MIPS ULTRIX $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s - ./if_mach POWERPC DARWIN $(AS) -o mach_dep.o $(srcdir)/powerpc_darwin_mach_dep.s - ./if_mach ALPHA LINUX $(CC) -c -o mach_dep.o $(srcdir)/alpha_mach_dep.S - ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep.o $(srcdir)/sparc_mach_dep.S - ./if_mach SPARC OPENBSD $(AS) -o mach_dep.o $(srcdir)/sparc_sunos4_mach_dep.s - ./if_mach SPARC NETBSD $(AS) -o mach_dep.o $(srcdir)/sparc_netbsd_mach_dep.s + ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep2.o $(srcdir)/sparc_mach_dep.S + ./if_mach SPARC OPENBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_sunos4_mach_dep.s + ./if_mach SPARC NETBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_netbsd_mach_dep.s + ./if_mach SPARC "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c + ./if_mach SPARC "" ld -r -o mach_dep.o mach_dep1.o mach_dep2.o ./if_mach IA64 "" as $(AS_ABI_FLAG) -o ia64_save_regs_in_stack.o $(srcdir)/ia64_save_regs_in_stack.s ./if_mach IA64 "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c ./if_mach IA64 "" ld -r -o mach_dep.o mach_dep1.o ia64_save_regs_in_stack.o @@ -638,7 +637,7 @@ gctest: tests/test.o gc.a $(UTILS) # If an optimized setjmp_test generates a segmentation fault, # odds are your compiler is broken. Gctest may still work. # Try compiling setjmp_t.c unoptimized. -setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) +setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) $(AO_INSTALL_DIR) $(CC) $(CFLAGS) -o setjmp_test $(srcdir)/setjmp_t.c test: KandRtest cord/cordtest diff --git a/Makefile.direct b/Makefile.direct index 24326c0..78e8159 100644 --- a/Makefile.direct +++ b/Makefile.direct @@ -32,7 +32,7 @@ VPATH= $(srcdir) # Atomic_ops installation directory. If this doesn't exist, we create # it from the included libatomic_ops distribution. -AO_VERSION=1.0 +AO_VERSION=1.1 AO_SRC_DIR=$(srcdir)/libatomic_ops-$(AO_VERSION) AO_INSTALL_DIR=$(srcdir)/libatomic_ops-install @@ -349,7 +349,7 @@ SRCS= $(CSRCS) mips_sgi_mach_dep.s rs6000_mach_dep.s alpha_mach_dep.S \ DOC_FILES= README.QUICK doc/README.Mac doc/README.MacOSX doc/README.OS2 \ doc/README.amiga doc/README.cords doc/debugging.html \ - doc/porting.html \ + doc/porting.html doc/overview.html \ doc/README.dj doc/README.hp doc/README.linux doc/README.rs6000 \ doc/README.sgi doc/README.solaris2 doc/README.uts \ doc/README.win32 doc/barrett_diagram doc/README \ @@ -450,8 +450,9 @@ $(OBJS) tests/test.o dyn_load.o dyn_load_sunos53.o: \ mark.o typd_mlc.o finalize.o ptr_chck.o: $(srcdir)/include/gc_mark.h \ $(srcdir)/include/private/gc_pmark.h -specific.o pthread_support.o: $(srcdir)/include/private/specific.h \ - $(srcdir)/include/gc_inline.h +specific.o pthread_support.o thread_local_alloc.o win32_threads.o: \ + $(srcdir)/include/private/specific.h $(srcdir)/include/gc_inline.h \ + $(srcdir)/include/private/thread_local_alloc.h dbg_mlc.o gcj_mlc.o: $(srcdir)/include/private/dbg_mlc.h @@ -465,6 +466,7 @@ tests: base_lib gc.a: $(OBJS) dyn_load.o $(UTILS) echo > base_lib rm -f dont_ar_1 + cp $(AO_INSTALL_DIR)/lib/libatomic_ops.a gc.a ./if_mach SPARC SUNOS5 touch dont_ar_1 ./if_mach SPARC SUNOS5 $(AR) rus gc.a $(OBJS) dyn_load.o ./if_mach M68K AMIGA touch dont_ar_1 @@ -513,7 +515,7 @@ dyn_load_sunos53.o: dyn_load.c # SunOS5 shared library version of the collector sunos5gc.so: $(OBJS) dyn_load_sunos53.o - $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o -ldl + $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o $(AO_INSTALL_DIR)/lib/libatomic_ops.a -ldl ln sunos5gc.so libgc.so # Alpha/OSF shared library version of the collector @@ -556,14 +558,11 @@ mach_dep.o: $(srcdir)/mach_dep.c $(srcdir)/mips_sgi_mach_dep.s \ $(srcdir)/ia64_save_regs_in_stack.s \ $(srcdir)/sparc_netbsd_mach_dep.s $(UTILS) rm -f mach_dep.o - ./if_mach MIPS IRIX5 $(CC) -c -o mach_dep.o $(srcdir)/mips_sgi_mach_dep.s - ./if_mach MIPS RISCOS $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s - ./if_mach MIPS ULTRIX $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s - ./if_mach POWERPC DARWIN $(AS) -o mach_dep.o $(srcdir)/powerpc_darwin_mach_dep.s - ./if_mach ALPHA LINUX $(CC) -c -o mach_dep.o $(srcdir)/alpha_mach_dep.S - ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep.o $(srcdir)/sparc_mach_dep.S - ./if_mach SPARC OPENBSD $(AS) -o mach_dep.o $(srcdir)/sparc_sunos4_mach_dep.s - ./if_mach SPARC NETBSD $(AS) -o mach_dep.o $(srcdir)/sparc_netbsd_mach_dep.s + ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep2.o $(srcdir)/sparc_mach_dep.S + ./if_mach SPARC OPENBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_sunos4_mach_dep.s + ./if_mach SPARC NETBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_netbsd_mach_dep.s + ./if_mach SPARC "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c + ./if_mach SPARC "" ld -r -o mach_dep.o mach_dep1.o mach_dep2.o ./if_mach IA64 "" as $(AS_ABI_FLAG) -o ia64_save_regs_in_stack.o $(srcdir)/ia64_save_regs_in_stack.s ./if_mach IA64 "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c ./if_mach IA64 "" ld -r -o mach_dep.o mach_dep1.o ia64_save_regs_in_stack.o @@ -638,7 +637,7 @@ gctest: tests/test.o gc.a $(UTILS) # If an optimized setjmp_test generates a segmentation fault, # odds are your compiler is broken. Gctest may still work. # Try compiling setjmp_t.c unoptimized. -setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) +setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) $(AO_INSTALL_DIR) $(CC) $(CFLAGS) -o setjmp_test $(srcdir)/setjmp_t.c test: KandRtest cord/cordtest diff --git a/Makefile.in b/Makefile.in index 0c881db..68708ef 100644 --- a/Makefile.in +++ b/Makefile.in @@ -527,6 +527,7 @@ dist_pkgdata_DATA = \ doc/README.solaris2 \ doc/README.uts \ doc/README.win32 \ + doc/overview.html \ doc/tree.html \ doc/leak.html \ doc/gcinterface.html \ diff --git a/NT_STATIC_THREADS_MAKEFILE b/NT_STATIC_THREADS_MAKEFILE index 91fb7f6..f37d6d1 100644 --- a/NT_STATIC_THREADS_MAKEFILE +++ b/NT_STATIC_THREADS_MAKEFILE @@ -10,8 +10,8 @@ CPU=$(MY_CPU) # should do, since we only need the headers. # We assume this was manually unpacked, since I'm not sure there is # a Windows standard command line tool to do this. -AO_VERSION=0.6 -AO_SRC_DIR=$(srcdir)/atomic_ops-$(AO_VERSION) +AO_VERSION=1.1 +AO_SRC_DIR=libatomic_ops-$(AO_VERSION)/src AO_INCLUDE_DIR=$(AO_SRC_DIR) OBJS= alloc.obj reclaim.obj allchblk.obj misc.obj mach_dep.obj os_dep.obj mark_rts.obj headers.obj mark.obj obj_map.obj blacklst.obj finalize.obj new_hblk.obj dbg_mlc.obj malloc.obj stubborn.obj dyn_load.obj typd_mlc.obj ptr_chck.obj gc_cpp.obj mallocx.obj win32_threads.obj diff --git a/allchblk.c b/allchblk.c index ad55bed..997580a 100644 --- a/allchblk.c +++ b/allchblk.c @@ -560,7 +560,7 @@ int index; /* Index of free list */ /* free blocks in GC_add_to_fl. */ # endif # ifdef USE_MUNMAP - hhdr -> hb_last_reclaimed = GC_gc_no; + hhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no; # endif hhdr -> hb_sz = h_size; GC_add_to_fl(h, hhdr); @@ -568,7 +568,7 @@ int index; /* Index of free list */ } struct hblk * -GC_allochblk_nth(word sz/* bytes */, int kind, unsigned char flags, int n); +GC_allochblk_nth(size_t sz/* bytes */, int kind, unsigned char flags, int n); /* * Allocate (and return pointer to) a heap block @@ -580,7 +580,7 @@ GC_allochblk_nth(word sz/* bytes */, int kind, unsigned char flags, int n); * The client is responsible for clearing the block, if necessary. */ struct hblk * -GC_allochblk(size_t sz, int kind, unsigned flags/* IGNORE_OFF_PAGE or 0 */) +GC_allochblk(size_t sz, int kind, unsigned char flags/* IGNORE_OFF_PAGE or 0 */) { word blocks; int start_list; @@ -603,7 +603,7 @@ GC_allochblk(size_t sz, int kind, unsigned flags/* IGNORE_OFF_PAGE or 0 */) * Unlike the above, sz is in bytes. */ struct hblk * -GC_allochblk_nth(word sz, int kind, unsigned char flags, int n) +GC_allochblk_nth(size_t sz, int kind, unsigned char flags, int n) { struct hblk *hbp; hdr * hhdr; /* Header corr. to hbp */ @@ -822,7 +822,7 @@ signed_word size; GC_remove_counts(hbp, (word)size); hhdr->hb_sz = size; # ifdef USE_MUNMAP - hhdr -> hb_last_reclaimed = GC_gc_no; + hhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no; # endif /* Check for duplicate deallocation in the easy case */ @@ -849,7 +849,7 @@ signed_word size; GC_remove_from_fl(prevhdr, FL_UNKNOWN); prevhdr -> hb_sz += hhdr -> hb_sz; # ifdef USE_MUNMAP - prevhdr -> hb_last_reclaimed = GC_gc_no; + prevhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no; # endif GC_remove_header(hbp); hbp = prev; diff --git a/alloc.c b/alloc.c index 1be4516..0e292f7 100644 --- a/alloc.c +++ b/alloc.c @@ -569,8 +569,16 @@ void GC_clear_fl_marks(ptr_t q) } bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, sz); if (mark_bit_from_hdr(hhdr, bit_no)) { + int n_marks = hhdr -> hb_n_marks - 1; clear_mark_bit_from_hdr(hhdr, bit_no); - --hhdr -> hb_n_marks; +# ifdef PARALLEL_MARK + /* Appr. count, don't decrement to zero! */ + if (0 != n_marks) { + hhdr -> hb_n_marks = n_marks; + } +# else + hhdr -> hb_n_marks = n_marks; +# endif } GC_bytes_found -= sz; } diff --git a/configure b/configure index 36c8a6f..7e58904 100755 --- a/configure +++ b/configure @@ -1,7 +1,7 @@ #! /bin/sh # From configure.ac Revision: 1.2 . # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.59 for gc 7.0alpha4. +# Generated by GNU Autoconf 2.59 for gc 7.0alpha5. # # Report bugs to . # @@ -429,8 +429,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='gc' PACKAGE_TARNAME='gc' -PACKAGE_VERSION='7.0alpha4' -PACKAGE_STRING='gc 7.0alpha4' +PACKAGE_VERSION='7.0alpha5' +PACKAGE_STRING='gc 7.0alpha5' PACKAGE_BUGREPORT='Hans.Boehm@hp.com' ac_unique_file="gcj_mlc.c" @@ -957,7 +957,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures gc 7.0alpha4 to adapt to many kinds of systems. +\`configure' configures gc 7.0alpha5 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1024,7 +1024,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of gc 7.0alpha4:";; + short | recursive ) echo "Configuration of gc 7.0alpha5:";; esac cat <<\_ACEOF @@ -1174,7 +1174,7 @@ fi test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF -gc configure 7.0alpha4 +gc configure 7.0alpha5 generated by GNU Autoconf 2.59 Copyright (C) 2003 Free Software Foundation, Inc. @@ -1188,7 +1188,7 @@ cat >&5 <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by gc $as_me 7.0alpha4, which was +It was created by gc $as_me 7.0alpha5, which was generated by GNU Autoconf 2.59. Invocation command line was $ $0 $@ @@ -1960,7 +1960,7 @@ fi # Define the identity of the package. PACKAGE='gc' - VERSION='7.0alpha4' + VERSION='7.0alpha5' cat >>confdefs.h <<_ACEOF @@ -4595,14 +4595,14 @@ echo $ECHO_N "checking which machine-dependent code should be used... $ECHO_C" > machdep= case "$host" in alpha-*-openbsd*) - machdep="alpha_mach_dep.lo" + machdep="mach_dep.lo" if test x"${ac_cv_lib_dl_dlopen}" != xyes ; then { echo "$as_me:$LINENO: WARNING: OpenBSD/Alpha without dlopen(). Shared library support is disabled" >&5 echo "$as_me: WARNING: OpenBSD/Alpha without dlopen(). Shared library support is disabled" >&2;} fi ;; alpha*-*-linux*) - machdep="alpha_mach_dep.lo" + machdep="mach_dep.lo" ;; i?86-*-solaris2.[89] | i?86-*-solaris2.1?) cat >>confdefs.h <<\_ACEOF @@ -4611,7 +4611,7 @@ _ACEOF ;; mipstx39-*-elf*) - machdep="mips_ultrix_mach_dep.lo" + machdep="mach_dep.lo" cat >>confdefs.h <<\_ACEOF #define STACKBASE __stackbase _ACEOF @@ -4622,31 +4622,31 @@ _ACEOF ;; mips-dec-ultrix*) - machdep="mips_ultrix_mach-dep.lo" + machdep="mach-dep.lo" ;; mips-nec-sysv*|mips-unknown-sysv*) ;; mips*-*-linux*) ;; mips-*-*) - machdep="mips_sgi_mach_dep.lo" + machdep="mach_dep.lo" cat >>confdefs.h <<\_ACEOF #define NO_EXECUTE_PERMISSION 1 _ACEOF ;; sparc-*-netbsd*) - machdep="sparc_netbsd_mach_dep.lo" + machdep="mach_dep.lo sparc_netbsd_mach_dep.lo" ;; sparc-sun-solaris2.3) - machdep="sparc_mach_dep.lo" + machdep="mach_dep.lo sparc_mach_dep.lo" cat >>confdefs.h <<\_ACEOF #define SUNOS53_SHARED_LIB 1 _ACEOF ;; sparc*-sun-solaris2.*) - machdep="sparc_mach_dep.lo" + machdep="mach_dep.lo sparc_mach_dep.lo" ;; ia64-*-*) machdep="mach_dep.lo ia64_save_regs_in_stack.lo" @@ -10714,7 +10714,7 @@ _ASBOX } >&5 cat >&5 <<_CSEOF -This file was extended by gc $as_me 7.0alpha4, which was +This file was extended by gc $as_me 7.0alpha5, which was generated by GNU Autoconf 2.59. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -10772,7 +10772,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -gc config.status 7.0alpha4 +gc config.status 7.0alpha5 configured by $0, generated by GNU Autoconf 2.59, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" diff --git a/configure.ac b/configure.ac index 9becbad..3410804 100644 --- a/configure.ac +++ b/configure.ac @@ -17,7 +17,7 @@ dnl Process this file with autoconf to produce configure. # Initialization # ============== -AC_INIT(gc,7.0alpha4,Hans.Boehm@hp.com) +AC_INIT(gc,7.0alpha5,Hans.Boehm@hp.com) ## version must conform to [0-9]+[.][0-9]+(alpha[0-9]+)? AC_CONFIG_SRCDIR(gcj_mlc.c) AC_CANONICAL_TARGET @@ -276,42 +276,42 @@ AC_MSG_CHECKING(which machine-dependent code should be used) machdep= case "$host" in alpha-*-openbsd*) - machdep="alpha_mach_dep.lo" + machdep="mach_dep.lo" if test x"${ac_cv_lib_dl_dlopen}" != xyes ; then AC_MSG_WARN(OpenBSD/Alpha without dlopen(). Shared library support is disabled) fi ;; alpha*-*-linux*) - machdep="alpha_mach_dep.lo" + machdep="mach_dep.lo" ;; i?86-*-solaris2.[[89]] | i?86-*-solaris2.1?) AC_DEFINE(SOLARIS25_PROC_VDB_BUG_FIXED) ;; mipstx39-*-elf*) - machdep="mips_ultrix_mach_dep.lo" + machdep="mach_dep.lo" AC_DEFINE(STACKBASE, __stackbase) AC_DEFINE(DATASTART_IS_ETEXT) ;; mips-dec-ultrix*) - machdep="mips_ultrix_mach-dep.lo" + machdep="mach-dep.lo" ;; mips-nec-sysv*|mips-unknown-sysv*) ;; mips*-*-linux*) ;; mips-*-*) - machdep="mips_sgi_mach_dep.lo" + machdep="mach_dep.lo" AC_DEFINE(NO_EXECUTE_PERMISSION) ;; sparc-*-netbsd*) - machdep="sparc_netbsd_mach_dep.lo" + machdep="mach_dep.lo sparc_netbsd_mach_dep.lo" ;; sparc-sun-solaris2.3) - machdep="sparc_mach_dep.lo" + machdep="mach_dep.lo sparc_mach_dep.lo" AC_DEFINE(SUNOS53_SHARED_LIB) ;; sparc*-sun-solaris2.*) - machdep="sparc_mach_dep.lo" + machdep="mach_dep.lo sparc_mach_dep.lo" ;; ia64-*-*) machdep="mach_dep.lo ia64_save_regs_in_stack.lo" diff --git a/doc/README b/doc/README index 2230f41..159fa89 100644 --- a/doc/README +++ b/doc/README @@ -31,7 +31,7 @@ are GPL'ed, but with an exception that should cover all uses in the collector. (If you are concerned about such things, I recommend you look at the notice in config.guess or ltmain.sh.) -This is version 7.0alpha2 of a conservative garbage collector for C and C++. +This is version 7.0alpha5 of a conservative garbage collector for C and C++. You might find a more recent version of this at diff --git a/doc/README.changes b/doc/README.changes index da79786..25b61c4 100644 --- a/doc/README.changes +++ b/doc/README.changes @@ -2236,8 +2236,21 @@ Since gc6.5: there. - More consistently define HBLKSIZE to 4096 on 64 bit architectures with 4K pages. (Thanks to Andrew Haley.) - -Since gc6.6: + - With win32 threads, GC_stop_world needs to acquire GC_write_cs. (Thanks + to Ben Hutchings for the observation and patch.) + - Move up struct callinfo declaration to make gcc 4.0.2. happy. + +Since 6.6: + - Add "int" to Solaris "end" and "etext" declaration in gc.h. Declared + the symbols with underscores and as arrays, since that's what's actually + used. Perhaps this could all just be removed? (Thanks to John Bowman.) + - Fixed ARM GC_test_and_set code. (Thanks to Kazu Hirata and Paul Brook.) + - Added casts for assignments to hb_last_reclaimed, which truncate the + value. Added a cast to GC_adj_words_allocd. Use GetModuleHandleA + when retrieving a handle to kernel32.dll under win32. (Thanks to the + Visual Prolog developers.) + +Since gc6.7: - Remove GC_PROTO, VOLATILE, GC_PTR, and GC_CONST. Assume ANSI C compiler and use ANSI constructs unconditionally. - Introduce #elif and #error in some of the appropriate places. @@ -2383,9 +2396,45 @@ Since gc7.0alpha3 - Added GC_getattr_np-based GC_get_stack_base (untested). - Separated thread local allocation into a separate file and added the beginning of win32 support for that. + +Since gc7.0alpha4 + (more 6.6, 6.7 changes) + - Some Solaris fixes, including some more general changes in how + the assembly pieces of mach_dep.c are handled. + - Removed a lot of SOLARIS_THREADS-specific code that was only + needed with the old implementation. This included many (mostly no-op) + versions of GC_is_fresh. + - Don't use atomic_ops in gc_locks.h unless we need threads. + - Fixed USE_MARK_BITS, which is once againthe default without PARALLEL_MARK. + - Removed Solaris GC_INIT hack. It's a workaround for a long dead bug, + and it seemed to be wrong anyway. + - Changed win32_threads.c to require preprocessor-based interception + of thread routines by default. A client call to GC_use_DllMain is + now required to get the old behavior in which DllMain is used to implicitly + register threads. This was doen for uniformity with other platforms, and + because the DllMain solution seemed to require very tricky code which, + at least in the past, imposed hard bounds onthe number of threads. + - Many small changes to make thread support work again on Cygwin. + - Moved definition of allocator lock etc. to pthread_support.c and + win32_threads.c for those two cases. + - Got rid of the FASTLOCK() machinery. It doesn't seem useful on modern + platforms. + - Cleaned up the uncollectable allocation routines, speeding up the + slower paths. The code did enough unnecessary work off the critical path + that the underlying logic was getting hard to extract. + - No longer turn off THREAD_LOCAL_ALLOC with DBG_HDRS_ALL. Indications + are it just works, and I think the reasons for it not working disappeared + a while ago. + - Fixed bugs in hb_n_marks calculation and assertion. + - Don't use __builtin_expect for pre-3.0 gcc. + - Define GWW_VDB only for recent Microsoft tool chains. + - Add overview.html to doc directory. + - Fix NT_STATIC_THREADS_MAKEFILE, various compiler warnings. + - Made thread local allocation sort of work with Cygwin. The code should + be there to deal with other Windows variants, But non-Cygwin Windows + threads need more bug fixes. To do: - - Fix USE_MARK_BITS. - REDIRECT_MALLOC and threads combination is getting closer, but currently usually fails because the DTV (dynamic thread vector) used to access thread-local storage is referenced only from the base of a thread stack, @@ -2394,7 +2443,6 @@ To do: Typically large heap sections end up cleared. - Clone marker inner loop to support arch-dependent prefetching, and counting of objects marked for finalization. - - function wrapping?? - The USE_MUNMAP code should really use a separate data structure indexed by physical page to keep track of time since last use of a page. Using hblk headers means we lose track of ages when diff --git a/doc/README.linux b/doc/README.linux index 1d0fd4c..99f4bbc 100644 --- a/doc/README.linux +++ b/doc/README.linux @@ -19,15 +19,15 @@ Linux threads. These should not be touched by the client program. To use threads, you need to abide by the following requirements: -1) You need to use LinuxThreads (which are included in libc6). +1) You need to use LinuxThreads or NPTL (which are included in libc6). The collector relies on some implementation details of the LinuxThreads - package. It is unlikely that this code will work on other + package. This code may not work on other pthread implementations (in particular it will *not* work with MIT pthreads). -2) You must compile the collector with -DGC_LINUX_THREADS and -D_REENTRANT - specified in the Makefile. +2) You must compile the collector with -DGC_LINUX_THREADS (or + just -DGC_THREADS) and -D_REENTRANT specified in the Makefile. 3a) Every file that makes thread calls should define GC_LINUX_THREADS and _REENTRANT and then include gc.h. Gc.h redefines some of the diff --git a/doc/doc.am b/doc/doc.am index a90e05d..d95fa17 100644 --- a/doc/doc.am +++ b/doc/doc.am @@ -43,6 +43,7 @@ dist_pkgdata_DATA = \ doc/README.solaris2 \ doc/README.uts \ doc/README.win32 \ + doc/overview.html \ doc/tree.html \ doc/leak.html \ doc/gcinterface.html \ diff --git a/doc/gcdescr.html b/doc/gcdescr.html index cab6bde..dc08470 100644 --- a/doc/gcdescr.html +++ b/doc/gcdescr.html @@ -1,7 +1,7 @@ Conservative GC Algorithmic Overview - Hans-J. Boehm, HP Labs (Much of this was written at SGI) + Hans-J. Boehm, HP Labs (Some of this was written at SGI)

This is under construction, and may always be.

@@ -549,6 +549,67 @@ by using ld's function call wrapping mechanism under Linux. Recent versions of the collector support several facilites to enhance the processor-scalability and thread performance of the collector. These are discussed in more detail here. +We briefly outline the data approach to thread-local allocation in the +next section. +

Thread-local allocation

+If thread-local allocation is enabled, the collector keeps separate +arrays of free lists for each thread. Thread-local allocation +is currently only supported on a few platforms. +

+The free list arrays associated +with each thread are only used to satisfy requests for objects that +are both very small, and belong to one of a small number of well-known +kinds. These currently include "normal" and pointer-free objects. +Depending onthe configuration, "gcj" objects may also be included. +

+Thread-local free list entries contain either a pointer to the first +element of a free list, or they contain a counter of the number of +allocation "granules" allocated so far. Initially they contain the +value one, i.e. a small counter value. +

+Thread-local allocation allocates directly through the global +allocator, if the object is of a size or kind not covered by the +local free lists. +

+If there is an appropriate local free list, the allocator checks whether it +contains a sufficiently small counter value. If so, the counter is simply +incremented by the counter value, and the global allocator is used. +In this way, the initial few allocations of a given size bypass the local +allocator. A thread that only allocates a handful of objects of a given +size will not build up its own free list for that size. This avoids +wasting space for unpopular objects sizes or kinds. +

+Once the counter passes a threshold, GC_malloc_many is called +to allocate roughly HBLKSIZE space and put it on the corresponding +local free list. Further allocations of that size and kind then use +this free list, and no longer need to acquire the allocation lock. +The allocation procedure is otherwise similar to the global free lists. +The local free lists are also linked using the first word in the object. +In most cases this means they require considerably less time. +

+Local free lists are treated buy most of the rest of the collector +as though they were in-use reachable data. This requires some care, +since pointer-free objects are not normally traced, and hence a special +tracing procedure is required to mark all objects on pointer-free and +gcj local free lists. +

+On thread exit, any remaining thread-local free list entries are +transferred back to the global free list. +

+Note that if the collector is configured for thread-local allocation, +GC versions before 7 do not invoke the thread-local allocator by default. +GC_malloc only uses thread-local allocation in version 7 and later. +In earlier versions, GC_MALLOC (all caps) may be directed +to use thread-local allocation by defining GC_REDIRECT_TO_LOCAL +and then include gc_local_alloc.h. +

+For some more details see here, and the +technical report entitled + +``Fast Multiprocessor Memory Allocation and Garbage Collection'' + +

+


Comments are appreciated. Please send mail to boehm@acm.org or diff --git a/doc/overview.html b/doc/overview.html new file mode 100644 index 0000000..d31f937 --- /dev/null +++ b/doc/overview.html @@ -0,0 +1,446 @@ + +A garbage collector for C and C++ + + + + + + + + + + +
Interface OverviewTutorial SlidesFAQExampleDownloadLicense
+

A garbage collector for C and C++

+ +[ This is an updated version of the page formerly at +http://reality.sgi.com/boehm/gc.html +and before that at + +ftp://parcftp.xerox.com/pub/gc/gc.html.] +

+The Boehm-Demers-Weiser +conservative garbage collector can +be used as a garbage collecting +replacement for C malloc or C++ new. +It allows you to allocate memory basically as you normally would, +without explicitly deallocating memory that is no longer useful. +The collector automatically recycles memory when it determines +that it can no longer be otherwise accessed. +A simple example of such a use is given +here. +

+The collector is also used by a number of programming language +implementations that either use C as intermediate code, want +to facilitate easier interoperation with C libraries, or +just prefer the simple collector interface. +For a more detailed description of the interface, see +here. +

+Alternatively, the garbage collector may be used as +a leak detector +for C or C++ programs, though that is not its primary goal. +

+Typically several versions will be available. +Usually you should first try to use +gc_source/gc.tar.gz, +which is normally an older, more stable version. +

+If that fails, try the latest explicitly numbered version +in +gc_source/. +Later versions may contain additional features, platform support, +or bug fixes, but are likely to be less well tested. +Note that versions containing the letters alpha are even less +well tested than others, especially on non-HP platforms. +

+A slightly older version of the garbage collector is now also +included as part of the +GNU compiler +distribution. The source +code for that version is available for browsing +here. +

+The arguments for and against conservative garbage collection +in C and C++ are briefly +discussed in +issues.html. The beginnings of +a frequently-asked-questions list are here. +

+The garbage collector code is copyrighted by +Hans-J. Boehm, +Alan J. Demers, +Xerox Corporation, +Silicon Graphics, +and +Hewlett-Packard Company. +It may be used and copied without payment of a fee under minimal restrictions. +See the README file in the distribution or the +license for more details. +IT IS PROVIDED AS IS, +WITH ABSOLUTELY NO WARRANTY EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. +

+Empirically, this collector works with most unmodified C programs, +simply by replacing +malloc with GC_malloc calls, +replacing realloc with GC_realloc calls, and removing +free calls. Exceptions are discussed +in issues.html. +

Platforms

+The collector is not completely portable, but the distribution +includes ports to most standard PC and UNIX/Linux platforms. +The collector should work on Linux, *BSD, recent Windows versions, +MacOS X, HP/UX, Solaris, +Tru64, Irix and a few other operating systems. +Some ports are more polished than others. +

+Irix pthreads, Linux threads, Win32 threads, Solaris threads +(old style and pthreads), +HP/UX 11 pthreads, Tru64 pthreads, and MacOS X threads are supported +in recent versions. +

Separately distributed ports

+For MacOS 9/Classic use, Patrick Beard's latest port is available from + +http://homepage.mac.com/pcbeard/gc/. +(Unfortunately, that's now quite dated. +I'm not in a position to test under MacOS. Although I try to +incorporate changes, it is impossible for +me to update the project file.) +

+Precompiled versions of the collector for NetBSD are available +here +or +here. +

+Debian Linux includes prepackaged +versions of the collector. +

Scalable multiprocessor versions

+Kenjiro Taura, Toshio Endo, and Akinori Yonezawa have made available +a parallel collector +based on this one. Their collector takes advantage of multiple processors +during a collection. Starting with collector version 6.0alpha1 +we also do this, though with more modest processor scalability goals. +Our approach is discussed briefly in +scale.html. +

Some Collector Details

+The collector uses a mark-sweep algorithm. +It provides incremental and generational +collection under operating systems which provide the right kind of +virtual memory support. (Currently this includes SunOS[45], IRIX, +OSF/1, Linux, and Windows, with varying restrictions.) +It allows finalization code +to be invoked when an object is collected. +It can take advantage of type information to locate pointers if such +information is provided, but it is usually used without such information. +ee the README and +gc.h files in the distribution for more details. +

+For an overview of the implementation, see here. +

+The garbage collector distribution includes a C string +(cord) package that provides +for fast concatenation and substring operations on long strings. +A simple curses- and win32-based editor that represents the entire file +as a cord is included as a +sample application. +

+Performance of the nonincremental collector is typically competitive +with malloc/free implementations. Both space and time overhead are +likely to be only slightly higher +for programs written for malloc/free +(see Detlefs, Dosser and Zorn's +Memory Allocation Costs in Large C and C++ Programs.) +For programs allocating primarily very small objects, the collector +may be faster; for programs allocating primarily large objects it will +be slower. If the collector is used in a multithreaded environment +and configured for thread-local allocation, it may in some cases +significantly outperform malloc/free allocation in time. +

+We also expect that in many cases any additional overhead +will be more than compensated for by decreased copying etc. +if programs are written +and tuned for garbage collection. +

Further Reading:

+The beginnings of a frequently asked questions list for this +collector are here. +

+The following provide information on garbage collection in general: +

+Paul Wilson's garbage collection ftp archive and GC survey. +

+The Ravenbrook +Memory Management Reference. +

+David Chase's +GC FAQ. +

+Richard Jones' + +GC page and + +his book. +

+The following papers describe the collector algorithms we use +and the underlying design decisions at +a higher level. +

+(Some of the lower level details can be found +here.) +

+The first one is not available +electronically due to copyright considerations. Most of the others are +subject to ACM copyright. +

+Boehm, H., "Dynamic Memory Allocation and Garbage Collection", Computers in Physics +9, 3, May/June 1995, pp. 297-303. This is directed at an otherwise sophisticated +audience unfamiliar with memory allocation issues. The algorithmic details differ +from those in the implementation. There is a related letter to the editor and a minor +correction in the next issue. +

+Boehm, H., and M. Weiser, +"Garbage Collection in an Uncooperative Environment", +Software Practice & Experience, September 1988, pp. 807-820. +

+Boehm, H., A. Demers, and S. Shenker, "Mostly Parallel Garbage Collection", Proceedings +of the ACM SIGPLAN '91 Conference on Programming Language Design and Implementation, +SIGPLAN Notices 26, 6 (June 1991), pp. 157-164. +

+Boehm, H., "Space Efficient Conservative Garbage Collection", Proceedings of the ACM +SIGPLAN '93 Conference on Programming Language Design and Implementation, SIGPLAN +Notices 28, 6 (June 1993), pp. 197-206. +

+Boehm, H., "Reducing Garbage Collector Cache Misses", + Proceedings of the 2000 International Symposium on Memory Management . + +Official version. + +Technical report version. Describes the prefetch strategy +incorporated into the collector for some platforms. Explains why +the sweep phase of a "mark-sweep" collector should not really be +a distinct phase. +

+M. Serrano, H. Boehm, +"Understanding Memory Allocation of Scheme Programs", +Proceedings of the Fifth ACM SIGPLAN International Conference on +Functional Programming, 2000, Montreal, Canada, pp. 245-256. + +Official version. + +Earlier Technical Report version. Includes some discussion of the +collector debugging facilities for identifying causes of memory retention. +

+Boehm, H., +"Fast Multiprocessor Memory Allocation and Garbage Collection", + +HP Labs Technical Report HPL 2000-165. Discusses the parallel +collection algorithms, and presents some performance results. +

+Boehm, H., "Bounding Space Usage of Conservative Garbage Collectors", +Proceeedings of the 2002 ACM SIGPLAN-SIGACT Symposium on Principles of +Programming Languages, Jan. 2002, pp. 93-100. + +Official version. + +Technical report version. +Includes a discussion of a collector facility to much more reliably test for +the potential of unbounded heap growth. +

+The following papers discuss language and compiler restrictions necessary to guaranteed +safety of conservative garbage collection. +

+We thank John Levine and JCLT for allowing +us to make the second paper available electronically, and providing PostScript for the final +version. +

+Boehm, H., ``Simple +Garbage-Collector-Safety'', Proceedings +of the ACM SIGPLAN '96 Conference on Programming Language Design +and Implementation. +

+Boehm, H., and D. Chase, +``A Proposal for Garbage-Collector-Safe C Compilation'', +Journal of C Language Translation 4, 2 (Decemeber 1992), pp. 126-141. +

+Other related information: +

+The Detlefs, Dosser and Zorn's Memory Allocation Costs in Large C and C++ Programs. + This is a performance comparison of the Boehm-Demers-Weiser collector to malloc/free, +using programs written for malloc/free. +

+Joel Bartlett's mostly copying conservative garbage collector for C++. +

+John Ellis and David Detlef's Safe Efficient Garbage Collection for C++ proposal. +

+Henry Baker's paper collection. +

+Slides for Hans Boehm's Allocation and GC Myths talk. +

Current users:

+Known current users of some variant of this collector include: +

+The runtime system for GCJ, +the static GNU java compiler. +

+W3m, a text-based web browser. +

+Some versions of the Xerox DocuPrint printer software. +

+The Mozilla project, as leak +detector. +

+The Mono project, +an open source implementation of the .NET development framework. +

+The DotGNU Portable.NET +project, another open source .NET implementation. +

+The Irssi IRC client. +

+The Berkeley Titanium project. +

+The NAGWare f90 Fortran 90 compiler. +

+Elwood Corporation's +Eclipse Common Lisp system, C library, and translator. +

+The Bigloo +Scheme +and Camloo ML +compilers +written by Manuel Serrano and others. +

+Brent Benson's libscheme. +

+The MzScheme scheme implementation. +

+The University of Washington Cecil Implementation. +

+The Berkeley Sather implementation. +

+The Berkeley Harmonia Project. +

+The Toba Java Virtual +Machine to C translator. +

+The Gwydion Dylan compiler. +

+The +GNU Objective C runtime. +

+Macaulay 2, a system to support +research in algebraic geometry and commutative algebra. +

+The Vesta configuration management +system. +

+Visual Prolog 6. +

+Asymptote LaTeX-compatible +vector graphics language. + +

More collector information at this site

+A simple illustration of how to build and +use the collector.. +

+Description of alternate interfaces to the +garbage collector. +

+Slides from an ISMM 2004 tutorial about the GC. +

+A FAQ (frequently asked questions) list. +

+How to use the garbage collector as a leak detector. +

+Some hints on debugging garbage collected +applications. +

+An overview of the implementation of the +garbage collector. +

+The data structure used for fast pointer lookups. +

+Scalability of the collector to multiprocessors. +

+Directory containing garbage collector source. + +

More background information at this site

+An attempt to establish a bound on space usage of +conservative garbage collectors. +

+Mark-sweep versus copying garbage collectors +and their complexity. +

+Pros and cons of conservative garbage collectors, +in comparison to other collectors. + +

+Issues related to garbage collection vs. +manual memory management in C/C++. +

+An example of a case in which garbage collection +results in a much faster implementation as a result of reduced +synchronization. +

+Slide set discussing performance of nonmoving +garbage collectors. +

+ +Slide set discussing Destructors, Finalizers, and Synchronization +(POPL 2003). +

+ +Paper corresponding to above slide set. +( +Technical Report version.) +

+A Java/Scheme/C/C++ garbage collection benchmark. +

+Slides for talk on memory allocation myths. +

+Slides for OOPSLA 98 garbage collection talk. +

+Related papers. +

Contacts and Mailing List

+We have recently set up two mailing list for collector announcements +and discussions: + +To subscribe to these lists, send a mail message containing the +word "subscribe" to +gc-announce-request@linux.hpl.hp.com +or to +gc-request@linux.hpl.hp.com. +(Please ignore the instructions about web-based subscription. +The listed web site is behind the HP firewall.) +

+The archives for these lists appear +here. +The gc list archive may also be read at +gmane.org. +

+Some prior discussion of the collector has taken place on the gcc +java mailing list, whose archives appear +here, and also on +gclist@iecc.com. +

+Comments and bug reports may also be sent to +(Hans.Boehm@hp.com) or +(boehm@acm.org), but the gc +mailing list is usually preferred. + +

diff --git a/headers.c b/headers.c index 8b14b4b..1a0ce88 100644 --- a/headers.c +++ b/headers.c @@ -254,7 +254,7 @@ struct hblkhdr * GC_install_header(struct hblk *h) result = alloc_hdr(); SET_HDR(h, result); # ifdef USE_MUNMAP - result -> hb_last_reclaimed = GC_gc_no; + result -> hb_last_reclaimed = (unsigned short)GC_gc_no; # endif return(result); } diff --git a/include/gc.h b/include/gc.h index a47dc4a..5f049c5 100644 --- a/include/gc.h +++ b/include/gc.h @@ -962,17 +962,7 @@ extern void GC_thr_init(void); /* Needed for Solaris/X86 */ * A GC_INIT call is required if the collector is built with THREAD_LOCAL_ALLOC * defined and the initial allocation call is not to GC_malloc(). */ -#if (defined(sparc) || defined(__sparc)) && defined(sun) - /* - * If you are planning on putting - * the collector in a SunOS 5 dynamic library, you need to call GC_INIT() - * from the statically loaded program section. - * This circumvents a Solaris 2.X (X<=4) linker bug. - */ -# define GC_INIT() { extern end, etext; \ - GC_noop(&end, &etext); \ - GC_init();} -#elif defined(__CYGWIN32__) && defined(GC_DLL) || defined (_AIX) +#if defined(__CYGWIN32__) || defined (_AIX) /* * Similarly gnu-win32 DLLs need explicit initialization from * the main program, as does AIX. @@ -984,15 +974,22 @@ extern void GC_thr_init(void); /* Needed for Solaris/X86 */ extern int _bss_end__[]; # define GC_MAX(x,y) ((x) > (y) ? (x) : (y)) # define GC_MIN(x,y) ((x) < (y) ? (x) : (y)) -# define GC_DATASTART ((GC_PTR) GC_MIN(_data_start__, _bss_start__)) -# define GC_DATAEND ((GC_PTR) GC_MAX(_data_end__, _bss_end__)) +# define GC_DATASTART ((void *) GC_MIN(_data_start__, _bss_start__)) +# define GC_DATAEND ((void *) GC_MAX(_data_end__, _bss_end__)) +# if defined(GC_DLL) +# define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); } +# else + /* Main program init not required, but other defined needed for */ + /* uniformity. */ +# define GC_INIT() { GC_init(); } +# endif # endif # if defined(_AIX) extern int _data[], _end[]; -# define GC_DATASTART ((GC_PTR)((ulong)_data)) -# define GC_DATAEND ((GC_PTR)((ulong)_end)) +# define GC_DATASTART ((void *)((ulong)_data)) +# define GC_DATAEND ((void *)((ulong)_end)) +# define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); } # endif -# define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); } #else # define GC_INIT() { GC_init(); } #endif diff --git a/include/gc_config_macros.h b/include/gc_config_macros.h index 2cfa6c2..f3b5ef4 100644 --- a/include/gc_config_macros.h +++ b/include/gc_config_macros.h @@ -5,12 +5,12 @@ * Some tests for old macros. These violate our namespace rules and will * disappear shortly. Use the GC_ names. */ -#if defined(SOLARIS_THREADS) || defined(_SOLARIS_THREADS) +#if defined(SOLARIS_THREADS) || defined(_SOLARIS_THREADS) \ + || defined(_SOLARIS_PTHREADS) || defined(GC_SOLARIS_PTHREADS) + /* We no longer support old style Solaris threads. */ + /* GC_SOLARIS_THREADS now means pthreads. */ # define GC_SOLARIS_THREADS #endif -#if defined(_SOLARIS_PTHREADS) -# define GC_SOLARIS_PTHREADS -#endif #if defined(IRIX_THREADS) # define GC_IRIX_THREADS #endif @@ -39,7 +39,6 @@ #endif #if !defined(_REENTRANT) && (defined(GC_SOLARIS_THREADS) \ - || defined(GC_SOLARIS_PTHREADS) \ || defined(GC_HPUX_THREADS) \ || defined(GC_AIX_THREADS) \ || defined(GC_LINUX_THREADS)) @@ -52,7 +51,7 @@ # define _POSIX4A_DRAFT10_SOURCE 1 #endif -# if defined(GC_SOLARIS_PTHREADS) || defined(GC_FREEBSD_THREADS) || \ +# if defined(GC_SOLARIS_THREADS) || defined(GC_FREEBSD_THREADS) || \ defined(GC_IRIX_THREADS) || defined(GC_LINUX_THREADS) || \ defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS) || \ defined(GC_DGUX386_THREADS) || defined(GC_DARWIN_THREADS) || \ @@ -79,10 +78,12 @@ # define GC_IRIX_THREADS # define GC_PTHREADS # endif -# if defined(__sparc) && !defined(__linux__) -# define GC_SOLARIS_PTHREADS +# if defined(__sparc) && !defined(__linux__) \ + || defined(sun) && (defined(i386) || defined(__i386__)) +# define GC_SOLARIS_THREADS # define GC_PTHREADS # endif + # if defined(__APPLE__) && defined(__MACH__) && defined(__ppc__) # define GC_DARWIN_THREADS # define GC_PTHREADS @@ -111,10 +112,6 @@ # endif #endif -#if defined(GC_SOLARIS_PTHREADS) && !defined(GC_SOLARIS_THREADS) -# define GC_SOLARIS_THREADS -#endif - # define __GC # ifndef _WIN32_WCE # include diff --git a/include/gc_inline.h b/include/gc_inline.h index d2008cf..5f6b6bb 100644 --- a/include/gc_inline.h +++ b/include/gc_inline.h @@ -26,9 +26,12 @@ #include "gc.h" #include "gc_tiny_fl.h" -#ifndef __GNUC__ -# define __builtin_expect(x, y) (x) -#endif +#if __GNUC__ >= 3 +# define GC_EXPECT(expr, outcome) __builtin_expect(expr,outcome) + /* Equivalent to (expr), but predict that usually (expr)==outcome. */ +#else +# define GC_EXPECT(expr, outcome) (expr) +#endif /* __GNUC__ */ /* The ultimately general inline allocation macro. Allocate an object */ /* of size bytes, putting the resulting pointer in result. Tiny_fl is */ @@ -49,14 +52,14 @@ # define GC_FAST_MALLOC_GRANS(result,granules,tiny_fl,num_direct,\ kind,default_expr,init) \ { \ - if (__builtin_expect(granules >= GC_TINY_FREELISTS,0)) { \ + if (GC_EXPECT(granules >= GC_TINY_FREELISTS,0)) { \ result = default_expr; \ } else { \ void **my_fl = tiny_fl + granules; \ void *my_entry=*my_fl; \ void *next; \ \ - while (__builtin_expect((word)my_entry \ + while (GC_EXPECT((word)my_entry \ <= num_direct + GC_TINY_FREELISTS + 1, 0)) { \ /* Entry contains counter or NULL */ \ if ((word)my_entry - 1 < num_direct) { \ @@ -81,7 +84,7 @@ init; \ PREFETCH_FOR_WRITE(next); \ GC_ASSERT(GC_size(result) >= bytes + EXTRA_BYTES); \ - GC_ASSERT(((word *)result)[1] == 0); \ + GC_ASSERT((kind) == PTRFREE || ((word *)result)[1] == 0); \ out: ; \ } \ } diff --git a/include/private/gc_locks.h b/include/private/gc_locks.h index 4dcba2b..5eecc50 100644 --- a/include/private/gc_locks.h +++ b/include/private/gc_locks.h @@ -18,22 +18,10 @@ #ifndef GC_LOCKS_H #define GC_LOCKS_H -#include - /* * Mutual exclusion between allocator/collector routines. * Needed if there is more than one allocator thread. - * FASTLOCK() is assumed to try to acquire the lock in a cheap and - * dirty way that is acceptable for a few instructions, e.g. by - * inhibiting preemption. This is assumed to have succeeded only - * if a subsequent call to FASTLOCK_SUCCEEDED() returns TRUE. - * FASTUNLOCK() is called whether or not FASTLOCK_SUCCEEDED(). - * If signals cannot be tolerated with the FASTLOCK held, then - * FASTLOCK should disable signals. The code executed under - * FASTLOCK is otherwise immune to interruption, provided it is - * not restarted. - * DCL_LOCK_STATE declares any local variables needed by LOCK and UNLOCK - * and/or FASTLOCK. + * DCL_LOCK_STATE declares any local variables needed by LOCK and UNLOCK. * * In the PARALLEL_MARK case, we also need to define a number of * other inline finctions here: @@ -44,21 +32,9 @@ * */ # ifdef THREADS +# include + void GC_noop1(word); -# ifdef PCR_OBSOLETE /* Faster, but broken with multiple lwp's */ -# include "th/PCR_Th.h" -# include "th/PCR_ThCrSec.h" - extern struct PCR_Th_MLRep GC_allocate_ml; -# define DCL_LOCK_STATE PCR_sigset_t GC_old_sig_mask -# define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml) -# define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml) -# define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml) -# define FASTLOCK() PCR_ThCrSec_EnterSys() - /* Here we cheat (a lot): */ -# define FASTLOCK_SUCCEEDED() (*(int *)(&GC_allocate_ml) == 0) - /* TRUE if nobody currently holds the lock */ -# define FASTUNLOCK() PCR_ThCrSec_ExitSys() -# endif # ifdef PCR # include # include @@ -67,18 +43,37 @@ PCR_ERes GC_fastLockRes; PCR_sigset_t GC_old_sig_mask # define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml) # define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml) -# define FASTLOCK() (GC_fastLockRes = PCR_Th_ML_Try(&GC_allocate_ml)) -# define FASTLOCK_SUCCEEDED() (GC_fastLockRes == PCR_ERes_okay) -# define FASTUNLOCK() {\ - if( FASTLOCK_SUCCEEDED() ) PCR_Th_ML_Release(&GC_allocate_ml); } # endif # if !defined(AO_have_test_and_set_acquire) # define USE_PTHREAD_LOCKS # endif +# if defined(GC_WIN32_THREADS) && defined(GC_PTHREADS) +# define USE_PTHREAD_LOCKS +# endif -# if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS) +# if defined(GC_WIN32_THREADS) && !defined(USE_PTHREAD_LOCKS) +# include +# define NO_THREAD (DWORD)(-1) + extern DWORD GC_lock_holder; + extern CRITICAL_SECTION GC_allocate_ml; +# ifdef GC_ASSERTIONS +# define UNCOND_LOCK() \ + { EnterCriticalSection(&GC_allocate_ml); \ + SET_LOCK_HOLDER(); } +# define UNCOND_UNLOCK() \ + { GC_ASSERT(I_HOLD_LOCK()); UNSET_LOCK_HOLDER(); \ + LeaveCriticalSection(&GC_allocate_ml); } +# else +# define UNCOND_LOCK() EnterCriticalSection(&GC_allocate_ml); +# define UNCOND_UNLOCK() LeaveCriticalSection(&GC_allocate_ml); +# endif /* !GC_ASSERTIONS */ +# define SET_LOCK_HOLDER() GC_lock_holder = GetCurrentThreadId() +# define UNSET_LOCK_HOLDER() GC_lock_holder = NO_THREAD +# define I_HOLD_LOCK() (!GC_need_to_lock \ + || GC_lock_holder == GetCurrentThreadId()) +# elif defined(GC_PTHREADS) # define NO_THREAD (pthread_t)(-1) # include @@ -144,29 +139,16 @@ # endif # endif /* GC_PTHREADS with linux_threads.c implementation */ -# if defined(GC_WIN32_THREADS) -# if defined(GC_PTHREADS) -# include - extern pthread_mutex_t GC_allocate_ml; -# define UNCOND_LOCK() pthread_mutex_lock(&GC_allocate_ml) -# define UNCOND_UNLOCK() pthread_mutex_unlock(&GC_allocate_ml) -# else -# include - GC_API CRITICAL_SECTION GC_allocate_ml; -# define UNCOND_LOCK() EnterCriticalSection(&GC_allocate_ml); -# define UNCOND_UNLOCK() LeaveCriticalSection(&GC_allocate_ml); -# endif -# endif -# ifndef SET_LOCK_HOLDER -# define SET_LOCK_HOLDER() -# define UNSET_LOCK_HOLDER() -# define I_HOLD_LOCK() FALSE - /* Used on platforms were locks can be reacquired, */ - /* so it doesn't matter if we lie. */ -# endif + # else /* !THREADS */ -# define LOCK() -# define UNLOCK() +# define LOCK() +# define UNLOCK() +# define SET_LOCK_HOLDER() +# define UNSET_LOCK_HOLDER() +# define I_HOLD_LOCK() TRUE + /* Used only in positive assertions or to test whether */ + /* we still need to acaquire the lock. TRUE works in */ + /* either case. */ # endif /* !THREADS */ #if defined(UNCOND_LOCK) && !defined(LOCK) @@ -176,14 +158,6 @@ # define UNLOCK() if (GC_need_to_lock) { UNCOND_UNLOCK(); } #endif -# ifndef SET_LOCK_HOLDER -# define SET_LOCK_HOLDER() -# define UNSET_LOCK_HOLDER() -# define I_HOLD_LOCK() FALSE - /* Used on platforms were locks can be reacquired, */ - /* so it doesn't matter if we lie. */ -# endif - # ifndef ENTER_GC # define ENTER_GC() # define EXIT_GC() @@ -193,10 +167,4 @@ # define DCL_LOCK_STATE # endif -# ifndef FASTLOCK -# define FASTLOCK() LOCK() -# define FASTLOCK_SUCCEEDED() TRUE -# define FASTUNLOCK() UNLOCK() -# endif - #endif /* GC_LOCKS_H */ diff --git a/include/private/gc_pmark.h b/include/private/gc_pmark.h index 8a79b9d..1e96f18 100644 --- a/include/private/gc_pmark.h +++ b/include/private/gc_pmark.h @@ -167,23 +167,26 @@ exit_label: ; \ /* Set mark bit, exit if it was already set. */ # ifdef USE_MARK_BITS -/* FIXME: untested */ -# if defined(THREADS) - /* Introduces a benign race as in the byte case. */ -# define OR_WORD_EXIT_IF_SET(addr, mask, label) \ - if (!(*(addr) & (mask))) { \ - AO_or((AO_t *)(addr), (mask); \ - } else { \ - goto label; \ - } -# else /* !THREADS */ -# define OR_WORD_EXIT_IF_SET(addr, mask, label) \ - if (!(*(addr) & (mask))) { \ - *(addr) |= (mask); \ - } else { \ - goto label; \ - } -# endif +# ifdef PARALLEL_MARK + /* The following may fail to exit even if the bit was already set. */ + /* For our uses, that's benign: */ +# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \ + { \ + if (!(*(addr) & (mask))) { \ + AO_or((AO_t *)(addr), (mask); \ + } else { \ + goto label; \ + } \ + } +# else +# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \ + { \ + word old = *(addr); \ + word my_bits = (bits); \ + if (old & my_bits) goto exit_label; \ + *(addr) = (old | my_bits); \ + } +# endif /* !PARALLEL_MARK */ # define SET_MARK_BIT_EXIT_IF_SET(hhdr,bit_no,exit_label) \ { \ word * mark_word_addr = hhdr -> hb_marks + divWORDSZ(bit_no); \ @@ -194,18 +197,19 @@ exit_label: ; \ # endif -#if defined(I386) && defined(__GNUC__) +#ifdef USE_MARK_BYTES +# if defined(I386) && defined(__GNUC__) # define LONG_MULT(hprod, lprod, x, y) { \ asm("mull %2" : "=a"(lprod), "=d"(hprod) : "g"(y), "0"(x)); \ } -#else /* No in-line X86 assembly code */ +# else /* No in-line X86 assembly code */ # define LONG_MULT(hprod, lprod, x, y) { \ unsigned long long prod = (unsigned long long)x \ * (unsigned long long)y; \ hprod = prod >> 32; \ lprod = (unsigned32)prod; \ } -#endif +# endif /* There is a race here, and we may set */ /* the bit twice in the concurrent case. This can result in the */ @@ -218,6 +222,7 @@ exit_label: ; \ if (mark_byte) goto exit_label; \ *mark_byte_addr = 1; \ } +#endif /* USE_MARK_BYTES */ #ifdef PARALLEL_MARK # define INCR_MARKS(hhdr) \ diff --git a/include/private/gc_priv.h b/include/private/gc_priv.h index d65a393..b55a673 100644 --- a/include/private/gc_priv.h +++ b/include/private/gc_priv.h @@ -63,9 +63,6 @@ typedef char * ptr_t; /* A generic pointer to which we can add */ # ifndef GCCONFIG_H # include "gcconfig.h" -# ifndef USE_MARK_BYTES -# define USE_MARK_BYTES -# endif # endif # ifndef HEADERS_H @@ -74,8 +71,8 @@ typedef char * ptr_t; /* A generic pointer to which we can add */ #if __GNUC__ >= 3 # define EXPECT(expr, outcome) __builtin_expect(expr,outcome) -# define INLINE inline /* Equivalent to (expr), but predict that usually (expr)==outcome. */ +# define INLINE inline #else # define EXPECT(expr, outcome) (expr) # define INLINE @@ -192,17 +189,6 @@ typedef char * ptr_t; /* A generic pointer to which we can add */ /* */ /*********************************/ -#ifdef SAVE_CALL_CHAIN - -/* Fill in the pc and argument information for up to NFRAMES of my */ -/* callers. Ignore my frame and my callers frame. */ -struct callinfo; -void GC_save_callers(struct callinfo info[NFRAMES]); - -void GC_print_callers(struct callinfo info[NFRAMES]); - -#endif - #ifdef NEED_CALLINFO struct callinfo { word ci_pc; /* Caller, not callee, pc */ @@ -216,6 +202,16 @@ void GC_print_callers(struct callinfo info[NFRAMES]); }; #endif +#ifdef SAVE_CALL_CHAIN + +/* Fill in the pc and argument information for up to NFRAMES of my */ +/* callers. Ignore my frame and my callers frame. */ +void GC_save_callers(struct callinfo info[NFRAMES]); + +void GC_print_callers(struct callinfo info[NFRAMES]); + +#endif + /*********************************/ /* */ @@ -331,10 +327,10 @@ void GC_print_callers(struct callinfo info[NFRAMES]); # define ABORT(s) PCR_Base_Panic(s) # else # ifdef SMALL_CONFIG -# define ABORT(msg) abort(); +# define ABORT(msg) abort() # else GC_API void GC_abort(const char * msg); -# define ABORT(msg) GC_abort(msg); +# define ABORT(msg) GC_abort(msg) # endif # endif @@ -660,10 +656,20 @@ struct hblkhdr { counter_t hb_n_marks; /* Number of set mark bits, excluding */ /* the one always set at the end. */ /* Currently it is concurrently */ - /* updated and hence only a lower bound.*/ - /* But a zero value does gurantee that */ + /* updated and hence only approximate. */ + /* But a zero value does guarantee that */ /* the block contains no marked */ /* objects. */ + /* Ensuring this property means that we */ + /* never decrement it to zero during a */ + /* collection, and hence the count may */ + /* be one too high. Due to concurrent */ + /* updates, and arbitrary number of */ + /* increments, but not all of them (!) */ + /* may be lost, hence it may in theory */ + /* be much too low. */ + /* Without parallel marking, the count */ + /* is accurate. */ # ifdef USE_MARK_BYTES union { char _hb_marks[MARK_BITS_SZ]; @@ -676,12 +682,13 @@ struct hblkhdr { word dummy; /* Force word alignment of mark bytes. */ } _mark_byte_union; # define hb_marks _mark_byte_union._hb_marks -# define ANY_INDEX 23 /* Random mark bit index for assertions */ # else word hb_marks[MARK_BITS_SZ]; # endif /* !USE_MARK_BYTES */ }; +# define ANY_INDEX 23 /* "Random" mark bit index for assertions */ + /* heap block body */ # define HBLK_WORDS (HBLKSIZE/sizeof(word)) @@ -1156,28 +1163,9 @@ extern long GC_large_alloc_warn_suppressed; /* accessed. */ #ifdef PARALLEL_MARK # define OR_WORD(addr, bits) \ - { word old; \ - do { \ - old = *((volatile word *)addr); \ - } while (!GC_compare_and_exchange((addr), old, old | (bits))); \ - } -# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \ - { word old; \ - word my_bits = (bits); \ - do { \ - old = *((volatile word *)addr); \ - if (old & my_bits) goto exit_label; \ - } while (!GC_compare_and_exchange((addr), old, old | my_bits)); \ - } + { AO_or((volatile AO_t *)(addr), (AO_t)bits); } #else # define OR_WORD(addr, bits) *(addr) |= (bits) -# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \ - { \ - word old = *(addr); \ - word my_bits = (bits); \ - if (old & my_bits) goto exit_label; \ - *(addr) = (old | my_bits); \ - } #endif /* Mark bit operations */ @@ -1338,7 +1326,7 @@ void GC_with_callee_saves_pushed(void (*fn)(ptr_t, void *), # if defined(SPARC) || defined(IA64) /* Cause all stacked registers to be saved in memory. Return a */ /* pointer to the top of the corresponding memory stack. */ - word GC_save_regs_in_stack(void); + ptr_t GC_save_regs_in_stack(void); # endif /* Push register contents onto mark stack. */ /* If NURSERY is defined, the default push */ @@ -1504,7 +1492,8 @@ ptr_t GC_build_fl(struct hblk *h, size_t words, GC_bool clear, ptr_t list); /* called by GC_new_hblk, but also */ /* called explicitly without GC lock. */ -struct hblk * GC_allochblk (size_t size_in_bytes, int kind, unsigned flags); +struct hblk * GC_allochblk (size_t size_in_bytes, int kind, + unsigned char flags); /* Allocate a heap block, inform */ /* the marker that block is valid */ /* for objects of indicated size. */ @@ -1766,9 +1755,6 @@ GC_bool GC_page_was_dirty(struct hblk *h); /* Read retrieved dirty bits. */ GC_bool GC_page_was_ever_dirty(struct hblk *h); /* Could the page contain valid heap pointers? */ -void GC_is_fresh(struct hblk *h, word n); - /* Assert the region currently contains no */ - /* valid pointers. */ void GC_remove_protection(struct hblk *h, word nblocks, GC_bool pointerfree); /* h is about to be writteni or allocated. Ensure */ @@ -1896,7 +1882,7 @@ void GC_err_puts(const char *s); /* some other reason. */ # endif /* PARALLEL_MARK */ -# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) +# if defined(GC_PTHREADS) /* We define the thread suspension signal here, so that we can refer */ /* to it in the dirty bit implementation, if necessary. Ideally we */ /* would allocate a (real-time ?) signal using the standard mechanism.*/ diff --git a/include/private/gcconfig.h b/include/private/gcconfig.h index 9fe0419..9b80cbe 100644 --- a/include/private/gcconfig.h +++ b/include/private/gcconfig.h @@ -854,6 +854,7 @@ # define ALIGNMENT 4 /* Required by hardware */ # define CPP_WORDSZ 32 # endif +# define USE_ASM_PUSH_REGS # ifdef SUNOS5 # define OS_TYPE "SUNOS5" extern int _etext[]; @@ -1146,7 +1147,11 @@ # if !defined(__WATCOMC__) && !defined(GC_WIN32_THREADS) # define MPROTECT_VDB # endif -# define GWW_VDB +# if _MSC_VER >= 1300 /* .NET, i.e. > VisualStudio 6 */ +# define GWW_VDB +# else +# define MPROTECT_VDB +# endif # define DATAEND /* not needed */ # endif # ifdef MSWINCE @@ -1490,13 +1495,6 @@ # ifdef IA64 # define MACH_TYPE "IA64" - /* We need to get preserved registers in addition to register */ - /* windows. That's easiest to do with setjmp. */ -# ifdef PARALLEL_MARK -# define USE_MARK_BYTES - /* Compare-and-exchange is too expensive to use for */ - /* setting mark bits. */ -# endif # ifdef HPUX # ifdef _ILP32 # define CPP_WORDSZ 32 @@ -1992,6 +1990,14 @@ # define THREADS # endif +# if !defined(USE_MARK_BITS) && !defined(USE_MARK_BYTES) +# if defined(THREADS) && defined(PARALLEL_MARK) +# define USE_MARK_BYTES +# else +# define USE_MARK_BITS +# endif +# endif + # if defined(MSWINCE) # define NO_GETENV # endif diff --git a/include/private/pthread_support.h b/include/private/pthread_support.h index b2ef68e..77f1ad1 100644 --- a/include/private/pthread_support.h +++ b/include/private/pthread_support.h @@ -3,8 +3,7 @@ # include "private/gc_priv.h" -# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \ - && !defined(GC_WIN32_THREADS) +# if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS) #if defined(GC_DARWIN_THREADS) # include "private/darwin_stop_world.h" @@ -67,7 +66,7 @@ typedef struct GC_Thread_Rep { # endif } * GC_thread; -# define THREAD_TABLE_SZ 128 /* Must be power of 2 */ +# define THREAD_TABLE_SZ 256 /* Must be power of 2 */ extern volatile GC_thread GC_threads[THREAD_TABLE_SZ]; extern GC_bool GC_thr_initialized; diff --git a/include/private/thread_local_alloc.h b/include/private/thread_local_alloc.h index 32cbb08..3416931 100644 --- a/include/private/thread_local_alloc.h +++ b/include/private/thread_local_alloc.h @@ -19,6 +19,45 @@ /* implementation also exports GC_malloc and friends, which */ /* are declared in gc.h. */ +#include "private/gc_priv.h" + +#if defined(THREAD_LOCAL_ALLOC) + +#include "gc_inline.h" + + +# if defined USE_HPUX_TLS +# error USE_HPUX_TLS macro was replaced by USE_COMPILER_TLS +# endif + +# if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC) && \ + !defined(USE_WIN32_COMPILER_TLS) && !defined(USE_COMPILER_TLS) && \ + !defined(USE_CUSTOM_SPECIFIC) +# if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32) +# if defined(__GNUC__) /* Fixed for versions past 2.95? */ +# define USE_WIN32_SPECIFIC +# else +# define USE_WIN32_COMPILER_TLS +# endif /* !GNU */ +# elif defined(LINUX) && defined(__GNUC__) +# define USE_COMPILER_TLS +# elif (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \ + defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS)) +# define USE_PTHREAD_SPECIFIC +# elif defined(GC_HPUX_THREADS) +# ifdef __GNUC__ +# define USE_PTHREAD_SPECIFIC + /* Empirically, as of gcc 3.3, USE_COMPILER_TLS doesn't work. */ +# else +# define USE_COMPILER_TLS +# endif +# else +# define USE_CUSTOM_SPECIFIC /* Use our own. */ +# endif +# endif + +# include + /* One of these should be declared as the tlfs field in the */ /* structure pointed to by a GC_thread. */ typedef struct thread_local_freelists { @@ -52,22 +91,27 @@ typedef struct thread_local_freelists { # define GC_key_create pthread_key_create # define GC_remove_specific() /* No need for cleanup on exit. */ typedef pthread_key_t GC_key_t; -# elif defined(USE_COMPILER_TLS) +# elif defined(USE_COMPILER_TLS) || defined(USE_WIN32_COMPILER_TLS) # define GC_getspecific(x) (x) # define GC_setspecific(key, v) ((key) = (v), 0) # define GC_key_create(key, d) 0 # define GC_remove_specific() /* No need for cleanup on exit. */ typedef void * GC_key_t; # elif defined(USE_WIN32_SPECIFIC) +# include # define GC_getspecific TlsGetValue -# define GC_setspecific TlsSetValue +# define GC_setspecific(key, v) !TlsSetValue(key, v) + /* We assume 0 == success, msft does the opposite. */ # define GC_key_create(key, d) \ ((d) != 0? (ABORT("Destructor unsupported by TlsAlloc"),0) \ : (*(key) = TlsAlloc(), 0)) # define GC_remove_specific() /* No need for cleanup on thread exit. */ /* Need TlsFree on process exit/detach ? */ -# else + typedef DWORD GC_key_t; +# elif defined(USE_CUSTOM_SPECIFIC) # include "private/specific.h" +# else +# error implement me # endif @@ -86,14 +130,18 @@ void GC_destroy_thread_local(GC_tlfs p); /* we take care of an individual thread freelist structure. */ void GC_mark_thread_local_fls_for(GC_tlfs p); -#ifdef USE_COMPILER_TLS +extern +#if defined(USE_COMPILER_TLS) __thread +#elif defined(USE_WIN32_COMPILER_TLS) + declspec(thread) #endif GC_key_t GC_thread_key; + /* This is set up by the thread_local_alloc implementation. But the */ /* thread support layer calls GC_remove_specific(GC_thread_key) */ /* before a thread exits. */ /* And the thread support layer makes sure that GC_thread_key is traced,*/ /* if necessary. */ - +#endif /* THREAD_LOCAL_ALLOC */ diff --git a/mach_dep.c b/mach_dep.c index 50b5665..ca1ace1 100644 --- a/mach_dep.c +++ b/mach_dep.c @@ -65,7 +65,7 @@ asm static void PushMacRegisters() # if defined(SPARC) || defined(IA64) /* Value returned from register flushing routine; either sp (SPARC) */ /* or ar.bsp (IA64) */ - word GC_save_regs_ret_val; + ptr_t GC_save_regs_ret_val; # endif /* Routine to mark from registers that are preserved by the C compiler. */ @@ -265,88 +265,12 @@ ptr_t cold_gc_frame; GC_with_callee_saves_pushed(GC_push_current_stack, cold_gc_frame); } -/* On register window machines, we need a way to force registers into */ -/* the stack. Return sp. */ -# ifdef SPARC - asm(" .seg \"text\""); -# if defined(SVR4) || defined(NETBSD) || defined(FREEBSD) - asm(" .globl GC_save_regs_in_stack"); - asm("GC_save_regs_in_stack:"); - asm(" .type GC_save_regs_in_stack,#function"); -# else - asm(" .globl _GC_save_regs_in_stack"); - asm("_GC_save_regs_in_stack:"); -# endif -# if defined(__arch64__) || defined(__sparcv9) - asm(" save %sp,-128,%sp"); - asm(" flushw"); - asm(" ret"); - asm(" restore %sp,2047+128,%o0"); -# else - asm(" ta 0x3 ! ST_FLUSH_WINDOWS"); - asm(" retl"); - asm(" mov %sp,%o0"); -# endif -# ifdef SVR4 - asm(" .GC_save_regs_in_stack_end:"); - asm(" .size GC_save_regs_in_stack,.GC_save_regs_in_stack_end-GC_save_regs_in_stack"); -# endif -# ifdef LINT - word GC_save_regs_in_stack() { return(0 /* sp really */);} -# endif -# endif - -/* GC_clear_stack_inner(arg, limit) clears stack area up to limit and */ -/* returns arg. Stack clearing is crucial on SPARC, so we supply */ -/* an assembly version that's more careful. Assumes limit is hotter */ -/* than sp, and limit is 8 byte aligned. */ #if defined(ASM_CLEAR_CODE) -#ifndef SPARC - --> fix it -#endif - asm(".globl GC_clear_stack_inner"); - asm("GC_clear_stack_inner:"); - asm(".type GC_save_regs_in_stack,#function"); -#if defined(__arch64__) || defined(__sparcv9) - asm("mov %sp,%o2"); /* Save sp */ - asm("add %sp,2047-8,%o3"); /* p = sp+bias-8 */ - asm("add %o1,-2047-192,%sp"); /* Move sp out of the way, */ - /* so that traps still work. */ - /* Includes some extra words */ - /* so we can be sloppy below. */ - asm("loop:"); - asm("stx %g0,[%o3]"); /* *(long *)p = 0 */ - asm("cmp %o3,%o1"); - asm("bgu,pt %xcc, loop"); /* if (p > limit) goto loop */ - asm("add %o3,-8,%o3"); /* p -= 8 (delay slot) */ - asm("retl"); - asm("mov %o2,%sp"); /* Restore sp., delay slot */ -#else - asm("mov %sp,%o2"); /* Save sp */ - asm("add %sp,-8,%o3"); /* p = sp-8 */ - asm("clr %g1"); /* [g0,g1] = 0 */ - asm("add %o1,-0x60,%sp"); /* Move sp out of the way, */ - /* so that traps still work. */ - /* Includes some extra words */ - /* so we can be sloppy below. */ - asm("loop:"); - asm("std %g0,[%o3]"); /* *(long long *)p = 0 */ - asm("cmp %o3,%o1"); - asm("bgu loop "); /* if (p > limit) goto loop */ - asm("add %o3,-8,%o3"); /* p -= 8 (delay slot) */ - asm("retl"); - asm("mov %o2,%sp"); /* Restore sp., delay slot */ -#endif /* old SPARC */ - /* First argument = %o0 = return value */ -# ifdef SVR4 - asm(" .GC_clear_stack_inner_end:"); - asm(" .size GC_clear_stack_inner,.GC_clear_stack_inner_end-GC_clear_stack_inner"); -# endif - # ifdef LINT /*ARGSUSED*/ ptr_t GC_clear_stack_inner(arg, limit) ptr_t arg; word limit; { return(arg); } + /* The real version is in a .S file */ # endif #endif /* ASM_CLEAR_CODE */ diff --git a/malloc.c b/malloc.c index a36956a..1513735 100644 --- a/malloc.c +++ b/malloc.c @@ -215,14 +215,14 @@ void * GC_generic_malloc(size_t lb, int k) if(SMALL_OBJ(lb)) { lg = GC_size_map[lb]; opp = &(GC_aobjfreelist[lg]); - FASTLOCK(); - if( EXPECT(!FASTLOCK_SUCCEEDED() || (op = *opp) == 0, 0) ) { - FASTUNLOCK(); + LOCK(); + if( EXPECT((op = *opp) == 0, 0) ) { + UNLOCK(); return(GENERAL_MALLOC((word)lb, PTRFREE)); } *opp = obj_link(op); GC_bytes_allocd += GRANULES_TO_BYTES(lg); - FASTUNLOCK(); + UNLOCK(); return((void *) op); } else { return(GENERAL_MALLOC((word)lb, PTRFREE)); @@ -244,9 +244,9 @@ void * GC_generic_malloc(size_t lb, int k) if(SMALL_OBJ(lb)) { lg = GC_size_map[lb]; opp = (void **)&(GC_objfreelist[lg]); - FASTLOCK(); - if( EXPECT(!FASTLOCK_SUCCEEDED() || (op = *opp) == 0, 0) ) { - FASTUNLOCK(); + LOCK(); + if( EXPECT((op = *opp) == 0, 0) ) { + UNLOCK(); return(GENERAL_MALLOC((word)lb, NORMAL)); } /* See above comment on signals. */ @@ -258,7 +258,7 @@ void * GC_generic_malloc(size_t lb, int k) *opp = obj_link(op); obj_link(op) = 0; GC_bytes_allocd += GRANULES_TO_BYTES(lg); - FASTUNLOCK(); + UNLOCK(); return op; } else { return(GENERAL_MALLOC(lb, NORMAL)); diff --git a/mallocx.c b/mallocx.c index 761514d..91e41d5 100644 --- a/mallocx.c +++ b/mallocx.c @@ -451,8 +451,8 @@ void * GC_malloc_uncollectable(size_t lb) /* collected anyway. */ lg = GC_size_map[lb]; opp = &(GC_uobjfreelist[lg]); - FASTLOCK(); - if( FASTLOCK_SUCCEEDED() && (op = *opp) != 0 ) { + LOCK(); + if( (op = *opp) != 0 ) { /* See above comment on signals. */ *opp = obj_link(op); obj_link(op) = 0; @@ -461,28 +461,31 @@ void * GC_malloc_uncollectable(size_t lb) /* cleared only temporarily during a collection, as a */ /* result of the normal free list mark bit clearing. */ GC_non_gc_bytes += GRANULES_TO_BYTES(lg); - FASTUNLOCK(); - return((void *) op); - } - FASTUNLOCK(); - op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE); + UNLOCK(); + } else { + UNLOCK(); + op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE); + /* For small objects, the free lists are completely marked. */ + } + GC_ASSERT(0 == op || GC_is_marked(op)); + return((void *) op); } else { - op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE); - } - if (0 == op) return(0); - /* We don't need the lock here, since we have an undisguised */ - /* pointer. We do need to hold the lock while we adjust */ - /* mark bits. */ - { - register struct hblk * h; size_t lb; + hdr * hhdr; - h = HBLKPTR(op); - lb = HDR(h) -> hb_sz; + op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE); + if (0 == op) return(0); + GC_ASSERT(((word)op & (HBLKSIZE - 1)) == 0); /* large block */ + hhdr = HDR((struct hbklk *)op); + /* We don't need the lock here, since we have an undisguised */ + /* pointer. We do need to hold the lock while we adjust */ + /* mark bits. */ + lb = hhdr -> hb_sz; LOCK(); - GC_set_mark_bit(op); - GC_non_gc_bytes += lb; + set_mark_bit_from_hdr(hhdr, 0); /* Only object. */ + GC_ASSERT(hhdr -> hb_n_marks == 0); + hhdr -> hb_n_marks = 1; UNLOCK(); return((void *) op); } @@ -538,36 +541,36 @@ void * GC_malloc_atomic_uncollectable(size_t lb) /* collected anyway. */ lg = GC_size_map[lg]; opp = &(GC_auobjfreelist[lg]); - FASTLOCK(); - if( FASTLOCK_SUCCEEDED() && (op = *opp) != 0 ) { + LOCK(); + if( (op = *opp) != 0 ) { /* See above comment on signals. */ *opp = obj_link(op); obj_link(op) = 0; GC_bytes_allocd += GRANULES_TO_BYTES(lg); /* Mark bit was already set while object was on free list. */ GC_non_gc_bytes += GRANULES_TO_BYTES(lg); - FASTUNLOCK(); - return((void *) op); - } - FASTUNLOCK(); - op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE); + UNLOCK(); + } else { + UNLOCK(); + op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE); + } + GC_ASSERT(0 == op || GC_is_marked(op)); + return((void *) op); } else { - op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE); - } - if (0 == op) return(0); - /* We don't need the lock here, since we have an undisguised */ - /* pointer. We do need to hold the lock while we adjust */ - /* mark bits. */ - { - struct hblk * h; size_t lb; + hdr * hhdr; - h = HBLKPTR(op); - lb = HDR(h) -> hb_sz; + op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE); + if (0 == op) return(0); + + GC_ASSERT(((word)op & (HBLKSIZE - 1)) == 0); + hhdr = HDR((struct hbklk *)op); + lb = hhdr -> hb_sz; LOCK(); - GC_set_mark_bit(op); - GC_non_gc_bytes += lb; + set_mark_bit_from_hdr(hhdr, 0); /* Only object. */ + GC_ASSERT(hhdr -> hb_n_marks == 0); + hhdr -> hb_n_marks = 1; UNLOCK(); return((void *) op); } diff --git a/mark.c b/mark.c index 641f0d6..d46c1ac 100644 --- a/mark.c +++ b/mark.c @@ -156,7 +156,7 @@ void GC_clear_hdr_marks(hdr *hhdr) /* Set all mark bits in the header. Used for uncollectable blocks. */ void GC_set_hdr_marks(hdr *hhdr) { - int i; + unsigned i; size_t sz = hhdr -> hb_sz; int n_marks = FINAL_MARK_BIT(sz); @@ -214,7 +214,7 @@ void GC_clear_mark_bit(ptr_t p) int n_marks; clear_mark_bit_from_hdr(hhdr, bit_no); n_marks = hhdr -> hb_n_marks - 1; -# ifdef THREADS +# ifdef PARALLEL_MARK if (n_marks != 0) hhdr -> hb_n_marks = n_marks; /* Don't decrement to zero. The counts are approximate due to */ @@ -1473,7 +1473,7 @@ void GC_push_all_eager(ptr_t bottom, ptr_t top) word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1)); word * t = (word *)(((word) top) & ~(ALIGNMENT-1)); register word *p; - register word q; + register ptr_t q; register word *lim; register ptr_t greatest_ha = GC_greatest_plausible_heap_addr; register ptr_t least_ha = GC_least_plausible_heap_addr; @@ -1485,7 +1485,7 @@ void GC_push_all_eager(ptr_t bottom, ptr_t top) /* to be valid. */ lim = t - 1 /* longword */; for (p = b; p <= lim; p = (word *)(((ptr_t)p) + ALIGNMENT)) { - q = *p; + q = (ptr_t)(*p); GC_PUSH_ONE_STACK((ptr_t)q, p); } # undef GC_greatest_plausible_heap_addr @@ -1508,7 +1508,6 @@ void GC_push_all_stack_partially_eager(ptr_t bottom, ptr_t top, ptr_t cold_gc_frame) { if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) { -# define EAGER_BYTES 1024 /* Push the hot end of the stack eagerly, so that register values */ /* saved inside GC frames are marked before they disappear. */ /* The rest of the marking can be deferred until later. */ @@ -1546,21 +1545,52 @@ void GC_push_all_stack(ptr_t bottom, ptr_t top) # endif } -#if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) +#if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \ + defined(MARK_BIT_PER_GRANULE) +# if GC_GRANULE_WORDS == 1 +# define USE_PUSH_MARKED_ACCELERATORS +# define PUSH_GRANULE(q) \ + { ptr_t qcontents = (ptr_t)((q)[0]); \ + GC_PUSH_ONE_HEAP(qcontents, (q)); } +# elif GC_GRANULE_WORDS == 2 +# define USE_PUSH_MARKED_ACCELERATORS +# define PUSH_GRANULE(q) \ + { ptr_t qcontents = (ptr_t)((q)[0]); \ + GC_PUSH_ONE_HEAP(qcontents, (q)); \ + qcontents = (ptr_t)((q)[1]); \ + GC_PUSH_ONE_HEAP(qcontents, (q)+1); } +# elif GC_GRANULE_WORDS == 4 +# define USE_PUSH_MARKED_ACCELERATORS +# define PUSH_GRANULE(q) \ + { ptr_t qcontents = (ptr_t)((q)[0]); \ + GC_PUSH_ONE_HEAP(qcontents, (q)); \ + qcontents = (ptr_t)((q)[1]); \ + GC_PUSH_ONE_HEAP(qcontents, (q)+1); \ + qcontents = (ptr_t)((q)[2]); \ + GC_PUSH_ONE_HEAP(qcontents, (q)+2); \ + qcontents = (ptr_t)((q)[3]); \ + GC_PUSH_ONE_HEAP(qcontents, (q)+3); } +# endif +#endif + +#ifdef USE_PUSH_MARKED_ACCELERATORS /* Push all objects reachable from marked objects in the given block */ -/* of size 1 objects. */ +/* containing objects of size 1 granule. */ void GC_push_marked1(struct hblk *h, hdr *hhdr) { word * mark_word_addr = &(hhdr->hb_marks[0]); - register word *p; + word *p; word *plim; - register int i; - register word q; - register word mark_word; - register ptr_t greatest_ha = GC_greatest_plausible_heap_addr; - register ptr_t least_ha = GC_least_plausible_heap_addr; - register mse * mark_stack_top = GC_mark_stack_top; - register mse * mark_stack_limit = GC_mark_stack_limit; + word *q; + word mark_word; + + /* Allow registers to be used for some frequently acccessed */ + /* global variables. Otherwise aliasing issues are likely */ + /* to prevent that. */ + ptr_t greatest_ha = GC_greatest_plausible_heap_addr; + ptr_t least_ha = GC_least_plausible_heap_addr; + mse * mark_stack_top = GC_mark_stack_top; + mse * mark_stack_limit = GC_mark_stack_limit; # define GC_mark_stack_top mark_stack_top # define GC_mark_stack_limit mark_stack_limit # define GC_greatest_plausible_heap_addr greatest_ha @@ -1572,21 +1602,22 @@ void GC_push_marked1(struct hblk *h, hdr *hhdr) /* go through all words in block */ while( p < plim ) { mark_word = *mark_word_addr++; - i = 0; + q = p; while(mark_word != 0) { if (mark_word & 1) { - q = p[i]; - GC_PUSH_ONE_HEAP(q, p + i); + PUSH_GRANULE(q); } - i++; + q += GC_GRANULE_WORDS; mark_word >>= 1; } - p += WORDSZ; + p += WORDSZ*GC_GRANULE_WORDS; } + # undef GC_greatest_plausible_heap_addr # undef GC_least_plausible_heap_addr # undef GC_mark_stack_top # undef GC_mark_stack_limit + GC_mark_stack_top = mark_stack_top; } @@ -1594,19 +1625,20 @@ void GC_push_marked1(struct hblk *h, hdr *hhdr) #ifndef UNALIGNED /* Push all objects reachable from marked objects in the given block */ -/* of size 2 objects. */ +/* of size 2 (granules) objects. */ void GC_push_marked2(struct hblk *h, hdr *hhdr) { word * mark_word_addr = &(hhdr->hb_marks[0]); - register word *p; + word *p; word *plim; - register int i; - register word q; - register word mark_word; - register ptr_t greatest_ha = GC_greatest_plausible_heap_addr; - register ptr_t least_ha = GC_least_plausible_heap_addr; - register mse * mark_stack_top = GC_mark_stack_top; - register mse * mark_stack_limit = GC_mark_stack_limit; + word *q; + word mark_word; + + ptr_t greatest_ha = GC_greatest_plausible_heap_addr; + ptr_t least_ha = GC_least_plausible_heap_addr; + mse * mark_stack_top = GC_mark_stack_top; + mse * mark_stack_limit = GC_mark_stack_limit; + # define GC_mark_stack_top mark_stack_top # define GC_mark_stack_limit mark_stack_limit # define GC_greatest_plausible_heap_addr greatest_ha @@ -1618,42 +1650,43 @@ void GC_push_marked2(struct hblk *h, hdr *hhdr) /* go through all words in block */ while( p < plim ) { mark_word = *mark_word_addr++; - i = 0; + q = p; while(mark_word != 0) { if (mark_word & 1) { - q = p[i]; - GC_PUSH_ONE_HEAP(q, p + i); - q = p[i+1]; - GC_PUSH_ONE_HEAP(q, p + i); + PUSH_GRANULE(q); + PUSH_GRANULE(q + GC_GRANULE_WORDS); } - i += 2; + q += 2 * GC_GRANULE_WORDS; mark_word >>= 2; } - p += WORDSZ; + p += WORDSZ*GC_GRANULE_WORDS; } + # undef GC_greatest_plausible_heap_addr # undef GC_least_plausible_heap_addr # undef GC_mark_stack_top # undef GC_mark_stack_limit + GC_mark_stack_top = mark_stack_top; } +# if GC_GRANULE_WORDS < 4 /* Push all objects reachable from marked objects in the given block */ -/* of size 4 objects. */ +/* of size 4 (granules) objects. */ /* There is a risk of mark stack overflow here. But we handle that. */ /* And only unmarked objects get pushed, so it's not very likely. */ void GC_push_marked4(struct hblk *h, hdr *hhdr) { word * mark_word_addr = &(hhdr->hb_marks[0]); - register word *p; + word *p; word *plim; - register int i; - register word q; - register word mark_word; - register ptr_t greatest_ha = GC_greatest_plausible_heap_addr; - register ptr_t least_ha = GC_least_plausible_heap_addr; - register mse * mark_stack_top = GC_mark_stack_top; - register mse * mark_stack_limit = GC_mark_stack_limit; + word *q; + word mark_word; + + ptr_t greatest_ha = GC_greatest_plausible_heap_addr; + ptr_t least_ha = GC_least_plausible_heap_addr; + mse * mark_stack_top = GC_mark_stack_top; + mse * mark_stack_limit = GC_mark_stack_limit; # define GC_mark_stack_top mark_stack_top # define GC_mark_stack_limit mark_stack_limit # define GC_greatest_plausible_heap_addr greatest_ha @@ -1665,22 +1698,18 @@ void GC_push_marked4(struct hblk *h, hdr *hhdr) /* go through all words in block */ while( p < plim ) { mark_word = *mark_word_addr++; - i = 0; + q = p; while(mark_word != 0) { if (mark_word & 1) { - q = p[i]; - GC_PUSH_ONE_HEAP(q, p + i); - q = p[i+1]; - GC_PUSH_ONE_HEAP(q, p + i + 1); - q = p[i+2]; - GC_PUSH_ONE_HEAP(q, p + i + 2); - q = p[i+3]; - GC_PUSH_ONE_HEAP(q, p + i + 3); + PUSH_GRANULE(q); + PUSH_GRANULE(q + GC_GRANULE_WORDS); + PUSH_GRANULE(q + 2*GC_GRANULE_WORDS); + PUSH_GRANULE(q + 3*GC_GRANULE_WORDS); } - i += 4; + q += 4 * GC_GRANULE_WORDS; mark_word >>= 4; } - p += WORDSZ; + p += WORDSZ*GC_GRANULE_WORDS; } # undef GC_greatest_plausible_heap_addr # undef GC_least_plausible_heap_addr @@ -1689,9 +1718,11 @@ void GC_push_marked4(struct hblk *h, hdr *hhdr) GC_mark_stack_top = mark_stack_top; } +#endif /* GC_GRANULE_WORDS < 4 */ + #endif /* UNALIGNED */ -#endif /* SMALL_CONFIG */ +#endif /* USE_PUSH_MARKED_ACCELERATORS */ /* Push all objects reachable from marked objects in the given block */ void GC_push_marked(struct hblk *h, hdr *hhdr) @@ -1715,20 +1746,21 @@ void GC_push_marked(struct hblk *h, hdr *hhdr) lim = (h + 1)->hb_body - sz; } - switch(BYTES_TO_WORDS(sz)) { -# if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) + switch(BYTES_TO_GRANULES(sz)) { +# if defined(USE_PUSH_MARKED_ACCELERATORS) case 1: GC_push_marked1(h, hhdr); break; -# endif -# if !defined(SMALL_CONFIG) && !defined(UNALIGNED) && \ - !defined(USE_MARK_BYTES) - case 2: - GC_push_marked2(h, hhdr); - break; - case 4: - GC_push_marked4(h, hhdr); - break; +# if !defined(UNALIGNED) + case 2: + GC_push_marked2(h, hhdr); + break; +# if GC_GRANULE_WORDS < 4 + case 4: + GC_push_marked4(h, hhdr); + break; +# endif +# endif # endif default: GC_mark_stack_top_reg = GC_mark_stack_top; diff --git a/mark_rts.c b/mark_rts.c index 19ea80a..bd97c6e 100644 --- a/mark_rts.c +++ b/mark_rts.c @@ -593,7 +593,7 @@ void GC_push_roots(GC_bool all, ptr_t cold_gc_frame) /* If the world is not stopped, this is unsafe. It is */ /* also unnecessary, since we will do this again with the */ /* world stopped. */ -# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL) +# if defined(THREAD_LOCAL_ALLOC) if (GC_world_stopped) GC_mark_thread_local_free_lists(); # endif diff --git a/misc.c b/misc.c index 70e37fb..10bf512 100644 --- a/misc.c +++ b/misc.c @@ -43,31 +43,12 @@ int GC_log; /* Forward decl, so we can set it. */ #endif -# ifdef THREADS -# ifdef PCR -# include "il/PCR_IL.h" - PCR_Th_ML GC_allocate_ml; -# elif defined(GC_WIN32_THREADS) -# if defined(GC_PTHREADS) - pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER; -# elif defined(GC_DLL) - __declspec(dllexport) CRITICAL_SECTION GC_allocate_ml; -# else - CRITICAL_SECTION GC_allocate_ml; -# endif -# elif defined(GC_PTHREADS) -# if defined(USE_SPIN_LOCK) - pthread_t GC_lock_holder = NO_THREAD; -# else - pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER; - pthread_t GC_lock_holder = NO_THREAD; - /* Used only for assertions, and to prevent */ - /* recursive reentry in the system call wrapper. */ -# endif -# else - --> declare allocator lock here -# endif -# endif +#if defined(THREADS) && defined(PCR) +# include "il/PCR_IL.h" + PCR_Th_ML GC_allocate_ml; +#endif +/* For other platforms with threads, the lock and possibly */ +/* GC_lock_holder variables are defined in the thread support code. */ #if defined(NOSYS) || defined(ECOS) #undef STACKBASE @@ -157,7 +138,7 @@ void * GC_project2(void *arg1, void *arg2) /* quantization alogrithm (but we precompute it). */ void GC_init_size_map(void) { - register unsigned i; + int i; /* Map size 0 to something bigger. */ /* This avoids problems at lower levels. */ @@ -423,7 +404,7 @@ void GC_init(void) #if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS) if (!GC_is_initialized) { BOOL (WINAPI *pfn) (LPCRITICAL_SECTION, DWORD) = NULL; - HMODULE hK32 = GetModuleHandle("kernel32.dll"); + HMODULE hK32 = GetModuleHandleA("kernel32.dll"); if (hK32) (FARPROC) pfn = GetProcAddress(hK32, "InitializeCriticalSectionAndSpinCount"); diff --git a/os_dep.c b/os_dep.c index d78f8e9..e43062f 100644 --- a/os_dep.c +++ b/os_dep.c @@ -80,10 +80,12 @@ # undef GC_AMIGA_DEF #endif -#if defined(MSWIN32) || defined(MSWINCE) +#if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32) # define WIN32_LEAN_AND_MEAN # define NOSERVICE # include + /* It's not clear this is completely kosher under Cygwin. But it */ + /* allows us to get a working GC_get_stack_base. */ #endif #ifdef MACOS @@ -468,7 +470,7 @@ void GC_enable_signals(void) && !defined(MACOS) && !defined(DJGPP) && !defined(DOS4GW) \ && !defined(NOSYS) && !defined(ECOS) -# if defined(sigmask) && !defined(UTS4) && !defined(HURD) +# if 0 /* Use the traditional BSD interface */ # define SIGSET_T int # define SIG_DEL(set, signal) (set) &= ~(sigmask(signal)) @@ -477,14 +479,15 @@ void GC_enable_signals(void) /* longjmp implementations. Most systems appear not to have */ /* a signal 32. */ # define SIGSETMASK(old, new) (old) = sigsetmask(new) -# else - /* Use POSIX/SYSV interface */ -# define SIGSET_T sigset_t -# define SIG_DEL(set, signal) sigdelset(&(set), (signal)) -# define SIG_FILL(set) sigfillset(&set) -# define SIGSETMASK(old, new) sigprocmask(SIG_SETMASK, &(new), &(old)) # endif + /* Use POSIX/SYSV interface */ +# define SIGSET_T sigset_t +# define SIG_DEL(set, signal) sigdelset(&(set), (signal)) +# define SIG_FILL(set) sigfillset(&set) +# define SIGSETMASK(old, new) sigprocmask(SIG_SETMASK, &(new), &(old)) + + static GC_bool mask_initialized = FALSE; static SIGSET_T new_mask; @@ -578,7 +581,7 @@ word GC_page_size; * With threads, GC_mark_roots needs to know how to do this. * Called with allocator lock held. */ -# if defined(MSWIN32) || defined(MSWINCE) +# if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32) # define is_writable(prot) ((prot) == PAGE_READWRITE \ || (prot) == PAGE_WRITECOPY \ || (prot) == PAGE_EXECUTE_READWRITE \ @@ -970,7 +973,8 @@ ptr_t GC_get_main_stack_base(void) #endif /* FREEBSD_STACKBOTTOM */ #if !defined(BEOS) && !defined(AMIGA) && !defined(MSWIN32) \ - && !defined(MSWINCE) && !defined(OS2) && !defined(NOSYS) && !defined(ECOS) + && !defined(MSWINCE) && !defined(OS2) && !defined(NOSYS) && !defined(ECOS) \ + && !defined(CYGWIN32) ptr_t GC_get_main_stack_base(void) { @@ -2021,8 +2025,7 @@ void GC_default_push_other_roots(void) # endif /* PCR */ -# if defined(GC_SOLARIS_THREADS) || defined(GC_PTHREADS) || \ - defined(GC_WIN32_THREADS) +# if defined(GC_PTHREADS) || defined(GC_WIN32_THREADS) extern void GC_push_all_stacks(void); @@ -2031,7 +2034,7 @@ void GC_default_push_other_roots(void) GC_push_all_stacks(); } -# endif /* GC_SOLARIS_THREADS || GC_PTHREADS */ +# endif /* GC_WIN32_THREADS || GC_PTHREADS */ void (*GC_push_other_roots)(void) = GC_default_push_other_roots; @@ -2182,8 +2185,6 @@ void GC_or_pages(page_hash_table pht1, page_hash_table pht2) } # ifndef MPROTECT_VDB - void GC_is_fresh(struct hblk *h, word n) - {} void GC_remove_protection(struct hblk *h, word nblocks, GC_bool is_ptrfree) {} # endif @@ -2235,11 +2236,6 @@ GC_bool GC_page_was_ever_dirty(struct hblk *h) return(TRUE); } -/* Reset the n pages starting at h to "was never dirty" status. */ -void GC_is_fresh(struct hblk *h, word n) -{ -} - /* A call that: */ /* I) hints that [h, h+nblocks) is about to be written. */ /* II) guarantees that protection is removed. */ @@ -2302,11 +2298,6 @@ void GC_dirty(ptr_t p) async_set_pht_entry_from_index(GC_dirty_pages, index); } -/* Reset the n pages starting at h to "was never dirty" status. */ -void GC_is_fresh(struct hblk *h, word n) -{ -} - /*ARGSUSED*/ void GC_remove_protection(struct hblk *h, word nblocks, GC_bool is_ptrfree) { @@ -2980,12 +2971,6 @@ GC_bool GC_page_was_ever_dirty(struct hblk *h) return(TRUE); } -/* Reset the n pages starting at h to "was never dirty" status. */ -/*ARGSUSED*/ -void GC_is_fresh(struct hblk *h, word n) -{ -} - # endif /* MPROTECT_VDB */ # ifdef PROC_VDB @@ -3013,23 +2998,6 @@ void GC_is_fresh(struct hblk *h, word n) word GC_proc_buf_size = INITIAL_BUF_SZ; char *GC_proc_buf; -#ifdef GC_SOLARIS_THREADS -/* We don't have exact sp values for threads. So we count on */ -/* occasionally declaring stack pages to be fresh. Thus we */ -/* need a real implementation of GC_is_fresh. We can't clear */ -/* entries in GC_written_pages, since that would declare all */ -/* pages with the given hash address to be fresh. */ -# define MAX_FRESH_PAGES 8*1024 /* Must be power of 2 */ - struct hblk ** GC_fresh_pages; /* A direct mapped cache. */ - /* Collisions are dropped. */ - -# define FRESH_PAGE_SLOT(h) (divHBLKSZ((word)(h)) & (MAX_FRESH_PAGES-1)) -# define ADD_FRESH_PAGE(h) \ - GC_fresh_pages[FRESH_PAGE_SLOT(h)] = (h) -# define PAGE_IS_FRESH(h) \ - (GC_fresh_pages[FRESH_PAGE_SLOT(h)] == (h) && (h) != 0) -#endif - int GC_proc_fd; void GC_dirty_init(void) @@ -3060,15 +3028,6 @@ void GC_dirty_init(void) ABORT("/proc ioctl failed"); } GC_proc_buf = GC_scratch_alloc(GC_proc_buf_size); -# ifdef GC_SOLARIS_THREADS - GC_fresh_pages = (struct hblk **) - GC_scratch_alloc(MAX_FRESH_PAGES * sizeof (struct hblk *)); - if (GC_fresh_pages == 0) { - GC_err_printf("No space for fresh pages\n"); - EXIT(); - } - BZERO(GC_fresh_pages, MAX_FRESH_PAGES * sizeof (struct hblk *)); -# endif } /* Ignore write hints. They don't help us here. */ @@ -3080,11 +3039,7 @@ GC_bool is_ptrfree; { } -#ifdef GC_SOLARIS_THREADS -# define READ(fd,buf,nbytes) syscall(SYS_read, fd, buf, nbytes) -#else -# define READ(fd,buf,nbytes) read(fd, buf, nbytes) -#endif +# define READ(fd,buf,nbytes) read(fd, buf, nbytes) void GC_read_dirty(void) { @@ -3117,10 +3072,6 @@ void GC_read_dirty(void) /* Punt: */ memset(GC_grungy_pages, 0xff, sizeof (page_hash_table)); memset(GC_written_pages, 0xff, sizeof(page_hash_table)); -# ifdef GC_SOLARIS_THREADS - BZERO(GC_fresh_pages, - MAX_FRESH_PAGES * sizeof (struct hblk *)); -# endif return; } } @@ -3147,15 +3098,6 @@ void GC_read_dirty(void) register word index = PHT_HASH(h); set_pht_entry_from_index(GC_grungy_pages, index); -# ifdef GC_SOLARIS_THREADS - { - register int slot = FRESH_PAGE_SLOT(h); - - if (GC_fresh_pages[slot] == h) { - GC_fresh_pages[slot] = 0; - } - } -# endif h++; } } @@ -3165,30 +3107,16 @@ void GC_read_dirty(void) } /* Update GC_written_pages. */ GC_or_pages(GC_written_pages, GC_grungy_pages); -# ifdef GC_SOLARIS_THREADS - /* Make sure that old stacks are considered completely clean */ - /* unless written again. */ - GC_old_stacks_are_fresh(); -# endif } #undef READ GC_bool GC_page_was_dirty(struct hblk *h) -struct hblk *h; { register word index = PHT_HASH(h); register GC_bool result; result = get_pht_entry_from_index(GC_grungy_pages, index); -# ifdef GC_SOLARIS_THREADS - if (result && PAGE_IS_FRESH(h)) result = FALSE; - /* This happens only if page was declared fresh since */ - /* the read_dirty call, e.g. because it's in an unused */ - /* thread stack. It's OK to treat it as clean, in */ - /* that case. And it's consistent with */ - /* GC_page_was_ever_dirty. */ -# endif return(result); } @@ -3198,29 +3126,9 @@ GC_bool GC_page_was_ever_dirty(struct hblk *h) register GC_bool result; result = get_pht_entry_from_index(GC_written_pages, index); -# ifdef GC_SOLARIS_THREADS - if (result && PAGE_IS_FRESH(h)) result = FALSE; -# endif return(result); } -/* Caller holds allocation lock. */ -void GC_is_fresh(struct hblk *h, word n) -{ - - register word index; - -# ifdef GC_SOLARIS_THREADS - register word i; - - if (GC_fresh_pages != 0) { - for (i = 0; i < n; i++) { - ADD_FRESH_PAGE(h + i); - } - } -# endif -} - # endif /* PROC_VDB */ diff --git a/pthread_stop_world.c b/pthread_stop_world.c index bd1f67e..33cc9e0 100644 --- a/pthread_stop_world.c +++ b/pthread_stop_world.c @@ -1,7 +1,7 @@ #include "private/pthread_support.h" -#if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \ - && !defined(GC_WIN32_THREADS) && !defined(GC_DARWIN_THREADS) +#if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS) && \ + !defined(GC_DARWIN_THREADS) #include #include @@ -160,12 +160,12 @@ void GC_suspend_handler_inner(ptr_t sig_arg, void *context) return; } # ifdef SPARC - me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack(); + me -> stop_info.stack_ptr = GC_save_regs_in_stack(); # else me -> stop_info.stack_ptr = (ptr_t)(&dummy); # endif # ifdef IA64 - me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack(); + me -> backing_store_ptr = GC_save_regs_in_stack(); # endif /* Tell the thread that wants to stop the world that this */ @@ -282,6 +282,8 @@ void GC_push_all_stacks() (unsigned)p -> id, bs_lo, bs_hi); # endif if (pthread_equal(p -> id, me)) { + /* FIXME: This may add an unbounded number of entries, */ + /* and hence overflow the mark stack, which is bad. */ GC_push_all_eager(bs_lo, bs_hi); } else { GC_push_all_stack(bs_lo, bs_hi); @@ -340,13 +342,13 @@ int GC_suspend_all() return n_live_threads; } -/* Caller holds allocation lock. */ void GC_stop_world() { int i; int n_live_threads; int code; + GC_ASSERT(I_HOLD_LOCK()); #if DEBUG_THREADS GC_printf("Stopping the world from 0x%x\n", (unsigned)pthread_self()); #endif diff --git a/pthread_support.c b/pthread_support.c index 5b25525..a8c3c6b 100644 --- a/pthread_support.c +++ b/pthread_support.c @@ -80,7 +80,15 @@ # include #endif /* GC_DARWIN_THREADS */ - +/* Allocator lock definitions. */ +#if defined(USE_SPIN_LOCK) + pthread_t GC_lock_holder = NO_THREAD; +#else + pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER; + pthread_t GC_lock_holder = NO_THREAD; + /* Used only for assertions, and to prevent */ + /* recursive reentry in the system call wrapper. */ +#endif #if defined(GC_DGUX386_THREADS) # include @@ -241,7 +249,7 @@ void GC_mark_thread_local_free_lists(void) GC_check_tls_for(&(p->tlfs)); } } -# if !defined(USE_COMPILER_TLS) && !defined(USE_PTHREAD_SPECIFIC) +# if defined(USE_CUSTOM_SPECIFIC) if (GC_thread_key != 0) GC_check_tsd_marks(GC_thread_key); # endif @@ -346,16 +354,15 @@ volatile GC_thread GC_threads[THREAD_TABLE_SZ]; void GC_push_thread_structures(void) { + GC_ASSERT(I_HOLD_LOCK()); GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads)); -# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL) +# if defined(THREAD_LOCAL_ALLOC) GC_push_all((ptr_t)(&GC_thread_key), (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key)); # endif } -#if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL) -#endif /* THREAD_LOCAL_ALLOC */ - +/* It may not be safe to allocate when we register the first thread. */ static struct GC_Thread_Rep first_thread; /* Add a thread to GC_threads. We assume it wasn't already there. */ @@ -385,13 +392,13 @@ GC_thread GC_new_thread(pthread_t id) /* Delete a thread from GC_threads. We assume it is there. */ /* (The code intentionally traps if it wasn't.) */ -/* Caller holds allocation lock. */ void GC_delete_thread(pthread_t id) { int hv = ((word)id) % THREAD_TABLE_SZ; register GC_thread p = GC_threads[hv]; register GC_thread prev = 0; + GC_ASSERT(I_HOLD_LOCK()); while (!pthread_equal(p -> id, id)) { prev = p; p = p -> next; @@ -408,12 +415,14 @@ void GC_delete_thread(pthread_t id) /* been notified, then there may be more than one thread */ /* in the table with the same pthread id. */ /* This is OK, but we need a way to delete a specific one. */ -void GC_delete_gc_thread(pthread_t id, GC_thread gc_id) +void GC_delete_gc_thread(GC_thread gc_id) { + pthread_t id = gc_id -> id; int hv = ((word)id) % THREAD_TABLE_SZ; register GC_thread p = GC_threads[hv]; register GC_thread prev = 0; + GC_ASSERT(I_HOLD_LOCK()); while (p != gc_id) { prev = p; p = p -> next; @@ -680,7 +689,8 @@ void GC_thr_init(void) # if defined(GC_HPUX_THREADS) GC_nprocs = pthread_num_processors_np(); # endif -# if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS) +# if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS) \ + || defined(GC_SOLARIS_THREADS) GC_nprocs = sysconf(_SC_NPROCESSORS_ONLN); if (GC_nprocs <= 0) GC_nprocs = 1; # endif @@ -742,7 +752,7 @@ void GC_thr_init(void) /* may require allocation. */ /* Called without allocation lock. */ /* Must be called before a second thread is created. */ -/* Called without allocation lock. */ +/* Did we say it's called without the allocation lock? */ void GC_init_parallel(void) { if (parallel_initialized) return; @@ -751,7 +761,7 @@ void GC_init_parallel(void) /* GC_init() calls us back, so set flag first. */ if (!GC_is_initialized) GC_init(); /* Initialize thread local free lists if used. */ -# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL) +# if defined(THREAD_LOCAL_ALLOC) LOCK(); GC_init_thread_local(&(GC_lookup_thread(pthread_self())->tlfs)); UNLOCK(); @@ -789,12 +799,12 @@ static void GC_do_blocking_inner(ptr_t data, void * context) { me = GC_lookup_thread(pthread_self()); GC_ASSERT(!(me -> thread_blocked)); # ifdef SPARC - me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack(); + me -> stop_info.stack_ptr = GC_save_regs_in_stack(); # elif !defined(GC_DARWIN_THREADS) - me -> stop_info.stack_ptr = (ptr_t)GC_approx_sp(); + me -> stop_info.stack_ptr = GC_approx_sp(); # endif # ifdef IA64 - me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack(); + me -> backing_store_ptr = GC_save_regs_in_stack(); # endif me -> thread_blocked = TRUE; /* Save context here if we want to support precise stack marking */ @@ -830,13 +840,17 @@ int GC_unregister_my_thread(void) /* complete before we remove this thread. */ GC_wait_for_gc_completion(FALSE); me = GC_lookup_thread(pthread_self()); - GC_destroy_thread_local(&(me->tlfs)); +# if defined(THREAD_LOCAL_ALLOC) + GC_destroy_thread_local(&(me->tlfs)); +# endif if (me -> flags & DETACHED) { GC_delete_thread(pthread_self()); } else { me -> flags |= FINISHED; } - GC_remove_specific(GC_thread_key); +# if defined(THREAD_LOCAL_ALLOC) + GC_remove_specific(); +# endif UNLOCK(); return GC_SUCCESS; } @@ -877,7 +891,7 @@ int WRAP_FUNC(pthread_join)(pthread_t thread, void **retval) if (result == 0) { LOCK(); /* Here the pthread thread id may have been recycled. */ - GC_delete_gc_thread(thread, thread_gc_id); + GC_delete_gc_thread(thread_gc_id); UNLOCK(); } return result; @@ -899,7 +913,7 @@ WRAP_FUNC(pthread_detach)(pthread_t thread) thread_gc_id -> flags |= DETACHED; /* Here the pthread thread id may have been recycled. */ if (thread_gc_id -> flags & FINISHED) { - GC_delete_gc_thread(thread, thread_gc_id); + GC_delete_gc_thread(thread_gc_id); } UNLOCK(); } @@ -913,7 +927,7 @@ GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb, { GC_thread me; - GC_in_thread_creation = TRUE; /* OK to collect from unknow thread. */ + GC_in_thread_creation = TRUE; /* OK to collect from unknown thread. */ me = GC_new_thread(my_pthread); GC_in_thread_creation = FALSE; # ifdef GC_DARWIN_THREADS @@ -975,7 +989,7 @@ void * GC_inner_start_routine(struct GC_stack_base *sb, void * arg) sem_post(&(si -> registered)); /* Last action on si. */ /* OK to deallocate. */ pthread_cleanup_push(GC_thread_exit_proc, 0); -# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL) +# if defined(THREAD_LOCAL_ALLOC) LOCK(); GC_init_thread_local(&(me->tlfs)); UNLOCK(); diff --git a/reclaim.c b/reclaim.c index 6cb8b47..1149f49 100644 --- a/reclaim.c +++ b/reclaim.c @@ -289,7 +289,12 @@ void GC_reclaim_block(struct hblk *hbp, word report_if_found) } } else { GC_bool empty = GC_block_empty(hhdr); - GC_ASSERT(sz * hhdr -> hb_n_marks <= HBLKSIZE); +# ifdef PARALLEL_MARK + /* Count can be low or one too high. */ + GC_ASSERT(hhdr -> hb_n_marks <= HBLKSIZE/sz + 1); +# else + GC_ASSERT(sz * hhdr -> hb_n_marks <= HBLKSIZE); +# endif if (hhdr -> hb_descr != 0) { GC_composite_in_use += sz * hhdr -> hb_n_marks; } else { @@ -387,7 +392,7 @@ int GC_n_set_marks(hdr *hhdr) #endif /* !USE_MARK_BYTES */ /*ARGSUSED*/ -void GC_print_block_descr(struct hblk *h, word dummy) +void GC_print_block_descr(struct hblk *h, word /* struct PrintStats */ raw_ps) { hdr * hhdr = HDR(h); unsigned bytes = hhdr -> hb_sz; @@ -405,7 +410,7 @@ void GC_print_block_descr(struct hblk *h, word dummy) bytes += HBLKSIZE-1; bytes &= ~(HBLKSIZE-1); - ps = (struct Print_stats *)dummy; + ps = (struct Print_stats *)raw_ps; ps->total_bytes += bytes; ps->number_of_blocks++; } diff --git a/setjmp_t.c b/setjmp_t.c index 648c712..9dc6bfc 100644 --- a/setjmp_t.c +++ b/setjmp_t.c @@ -24,7 +24,7 @@ #include #include #include -#include "private/gcconfig.h" +#include "private/gc_priv.h" #ifdef OS2 /* GETPAGESIZE() is set to getpagesize() by default, but that */ @@ -82,6 +82,9 @@ int main() printf("A good guess for ALIGNMENT on this machine is %ld.\n", (unsigned long)(&(a.a_b))-(unsigned long)(&a)); + printf("The following is a very dubious test of one root marking" + " strategy.\n"); + printf("Results may not be accurate/useful:\n"); /* Encourage the compiler to keep x in a callee-save register */ x = 2*x-1; printf(""); @@ -107,6 +110,27 @@ int main() y++; x = 2; if (y == 1) longjmp(b,1); + printf("Some GC internal configuration stuff: \n"); + printf("\tWORDSZ = %d, ALIGNMENT = %d, GC_GRANULE_BYTES = %d\n", + WORDSZ, ALIGNMENT, GC_GRANULE_BYTES); + printf("\tUsing one mark "); +# if defined(USE_MARK_BYTES) + printf("byte"); +# elif defined(USE_MARK_BITS) + printf("bit"); +# endif + printf(" per "); +# if defined(MARK_BIT_PER_OBJ) + printf("object.\n"); +# elif defined(MARK_BIT_PER_GRANULE) + printf("granule.\n"); +# endif +# ifdef THREAD_LOCAL_ALLOC + printf("Thread local allocation enabled.\n"); +# endif +# ifdef PARALLEL_MARK + printf("Parallel marking enabled.\n"); +# endif return(0); } diff --git a/sparc_mach_dep.S b/sparc_mach_dep.S index 06a0f3b..6997fa1 100644 --- a/sparc_mach_dep.S +++ b/sparc_mach_dep.S @@ -24,6 +24,10 @@ GC_push_regs: .size GC_save_regs_in_stack,.GC_save_regs_in_stack_end-GC_save_regs_in_stack +! GC_clear_stack_inner(arg, limit) clears stack area up to limit and +! returns arg. Stack clearing is crucial on SPARC, so we supply +! an assembly version that's more careful. Assumes limit is hotter +! than sp, and limit is 8 byte aligned. .globl GC_clear_stack_inner GC_clear_stack_inner: #if defined(__arch64__) || defined(__sparcv9) diff --git a/tests/leak_test.c b/tests/leak_test.c index 421d0c6..d6a60d4 100644 --- a/tests/leak_test.c +++ b/tests/leak_test.c @@ -5,6 +5,9 @@ main() { int i; GC_find_leak = 1; /* for new collect versions not compiled */ /* with -DFIND_LEAK. */ + + GC_INIT(); /* Needed if thread-local allocation is enabled. */ + /* FIXME: This is not ideal. */ for (i = 0; i < 10; ++i) { p[i] = malloc(sizeof(int)+i); } @@ -18,4 +21,5 @@ main() { CHECK_LEAKS(); CHECK_LEAKS(); CHECK_LEAKS(); + return 0; } diff --git a/tests/test.c b/tests/test.c index 076ce4b..afe62f2 100644 --- a/tests/test.c +++ b/tests/test.c @@ -36,12 +36,10 @@ # else # include /* Not normally used, but handy for debugging. */ # endif -# include /* Not normally used, but handy for debugging. */ # include "gc.h" # include "gc_typed.h" # include "private/gc_priv.h" /* For output, locking, MIN_WORDS, */ - /* and some statistics. */ -# include "private/gcconfig.h" + /* and some statistics, and gcconfig.h. */ # if defined(MSWIN32) || defined(MSWINCE) # include @@ -53,11 +51,6 @@ # define GC_printf printf # endif -# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS) -# include -# include -# endif - # if defined(GC_PTHREADS) # include # endif @@ -507,8 +500,6 @@ void check_marks_int_list(sexpr x) } } -/* # elif defined(GC_SOLARIS_THREADS) */ - # else # define fork_a_thread() @@ -674,17 +665,11 @@ volatile int dropped_something = 0; # ifdef PCR PCR_ThCrSec_EnterSys(); # endif -# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS) - static mutex_t incr_lock; - mutex_lock(&incr_lock); -# endif -# if defined(GC_PTHREADS) +# if defined(GC_PTHREADS) static pthread_mutex_t incr_lock = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_lock(&incr_lock); -# else -# ifdef GC_WIN32_THREADS - EnterCriticalSection(&incr_cs); -# endif +# elif defined(GC_WIN32_THREADS) + EnterCriticalSection(&incr_cs); # endif if ((int)(GC_word)client_data != t -> level) { (void)GC_printf("Wrong finalization data - collector is broken\n"); @@ -695,15 +680,10 @@ volatile int dropped_something = 0; # ifdef PCR PCR_ThCrSec_ExitSys(); # endif -# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS) - mutex_unlock(&incr_lock); -# endif # if defined(GC_PTHREADS) pthread_mutex_unlock(&incr_lock); -# else -# ifdef GC_WIN32_THREADS - LeaveCriticalSection(&incr_cs); -# endif +# elif defined(GC_WIN32_THREADS) + LeaveCriticalSection(&incr_cs); # endif } @@ -757,17 +737,11 @@ int n; # ifdef PCR PCR_ThCrSec_EnterSys(); # endif -# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS) - static mutex_t incr_lock; - mutex_lock(&incr_lock); -# endif # if defined(GC_PTHREADS) static pthread_mutex_t incr_lock = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_lock(&incr_lock); -# else -# ifdef GC_WIN32_THREADS - EnterCriticalSection(&incr_cs); -# endif +# elif defined(GC_WIN32_THREADS) + EnterCriticalSection(&incr_cs); # endif /* Losing a count here causes erroneous report of failure. */ finalizable_count++; @@ -775,15 +749,10 @@ int n; # ifdef PCR PCR_ThCrSec_ExitSys(); # endif -# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS) - mutex_unlock(&incr_lock); -# endif # if defined(GC_PTHREADS) pthread_mutex_unlock(&incr_lock); -# else -# ifdef GC_WIN32_THREADS - LeaveCriticalSection(&incr_cs); -# endif +# elif defined(GC_WIN32_THREADS) + LeaveCriticalSection(&incr_cs); # endif } @@ -841,48 +810,8 @@ int n; chktree(t -> rchild, n-1); } -# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS) -thread_key_t fl_key; -void * alloc8bytes() -{ -# if defined(SMALL_CONFIG) || defined(GC_DEBUG) - collectable_count++; - return(GC_MALLOC(8)); -# else - void ** my_free_list_ptr; - void * my_free_list; - - if (thr_getspecific(fl_key, (void **)(&my_free_list_ptr)) != 0) { - (void)GC_printf("thr_getspecific failed\n"); - FAIL; - } - if (my_free_list_ptr == 0) { - uncollectable_count++; - my_free_list_ptr = GC_NEW_UNCOLLECTABLE(void *); - if (thr_setspecific(fl_key, my_free_list_ptr) != 0) { - (void)GC_printf("thr_setspecific failed\n"); - FAIL; - } - } - my_free_list = *my_free_list_ptr; - if (my_free_list == 0) { - collectable_count++; - my_free_list = GC_malloc_many(8); - if (my_free_list == 0) { - (void)GC_printf("alloc8bytes out of memory\n"); - FAIL; - } - } - *my_free_list_ptr = GC_NEXT(my_free_list); - GC_NEXT(my_free_list) = 0; - return(my_free_list); -# endif -} - -#else - -# if defined(GC_PTHREADS) +#if defined(GC_PTHREADS) pthread_key_t fl_key; void * alloc8bytes() @@ -918,9 +847,8 @@ void * alloc8bytes() # endif } -# else +#else # define alloc8bytes() GC_MALLOC_ATOMIC(8) -# endif #endif void alloc_small(n) @@ -1099,7 +1027,7 @@ static void uniq(void *p, ...) { for (j=0; j= (n = atoi( argv[ 1 ] )))) { - GC_printf0( "usage: test_cpp number-of-iterations\nAssuming 10 iters\n" ); + GC_printf( "usage: test_cpp number-of-iterations\nAssuming 10 iters\n" ); n = 10;} for (iters = 1; iters <= n; iters++) { - GC_printf1( "Starting iteration %d\n", iters ); + GC_printf( "Starting iteration %d\n", iters ); /* Allocate some uncollectable As and disguise their pointers. Later we'll check to see if the objects are still there. We're @@ -282,7 +282,7 @@ int APIENTRY WinMain( x = *xptr; # endif my_assert (29 == x[0]); - GC_printf0( "The test appears to have succeeded.\n" ); + GC_printf( "The test appears to have succeeded.\n" ); return( 0 );} diff --git a/tests/thread_leak_test.c b/tests/thread_leak_test.c index 1174705..5f183cf 100644 --- a/tests/thread_leak_test.c +++ b/tests/thread_leak_test.c @@ -37,4 +37,5 @@ main() { CHECK_LEAKS(); CHECK_LEAKS(); CHECK_LEAKS(); + return 0; } diff --git a/thread_local_alloc.c b/thread_local_alloc.c index 0961794..b3fe28c 100644 --- a/thread_local_alloc.c +++ b/thread_local_alloc.c @@ -12,37 +12,17 @@ */ #include "private/gc_priv.h" -# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL) +# if defined(THREAD_LOCAL_ALLOC) #include "private/thread_local_alloc.h" #include "gc_inline.h" -# if defined(GC_HPUX_THREADS) && !defined(USE_PTHREAD_SPECIFIC) \ - && !defined(USE_COMPILER_TLS) -# ifdef __GNUC__ -# define USE_PTHREAD_SPECIFIC - /* Empirically, as of gcc 3.3, USE_COMPILER_TLS doesn't work. */ -# else -# define USE_COMPILER_TLS -# endif -# endif - -# if defined USE_HPUX_TLS -# error USE_HPUX_TLS macro was replaced by USE_COMPILER_TLS -# endif - -# if (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \ - defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS)) \ - && !defined(USE_PTHREAD_SPECIFIC) -# define USE_PTHREAD_SPECIFIC -# endif - # include -/* We don't really support thread-local allocation with DBG_HDRS_ALL */ - -#ifdef USE_COMPILER_TLS +#if defined(USE_COMPILER_TLS) __thread +#elif defined(USE_WIN32_COMPILER_TLS) + declspec(thread) #endif GC_key_t GC_thread_key; @@ -56,7 +36,6 @@ static void return_freelists(void **fl, void **gfl) void *q, **qptr; for (i = 1; i < TINY_FREELISTS; ++i) { -#if 0 if ((word)(fl[i]) >= HBLKSIZE) { if (gfl[i] == 0) { gfl[i] = fl[i]; @@ -71,7 +50,6 @@ static void return_freelists(void **fl, void **gfl) gfl[i] = fl[i]; } } -#endif /* Clear fl[i], since the thread structure may hang around. */ /* Do it in a way that is likely to trap if we access it. */ fl[i] = (ptr_t)HBLKSIZE; @@ -134,11 +112,16 @@ void GC_destroy_thread_local(GC_tlfs p) # endif } -#if defined(GC_ASSERTIONS) && defined(GC_LINUX_THREADS) +#if defined(GC_ASSERTIONS) && defined(GC_PTHREADS) && !defined(CYGWIN32) # include extern char * GC_lookup_thread(pthread_t id); #endif +#if defined(GC_ASSERTIONS) && defined(GC_WIN32_THREADS) +# include + extern char * GC_lookup_thread(int id); +#endif + void * GC_malloc(size_t bytes) { size_t granules = ROUNDED_UP_GRANULES(bytes); @@ -164,10 +147,14 @@ void * GC_malloc(size_t bytes) # endif # ifdef GC_ASSERTIONS /* We can't check tsd correctly, since we don't have access to */ - /* the right declarations. But we cna check that it's close. */ + /* the right declarations. But we can check that it's close. */ LOCK(); { - char * me = GC_lookup_thread(pthread_self()); +# if defined(GC_WIN32_THREADS) + char * me = (char *)GC_lookup_thread_inner(GetCurrentThreadId()); +# else + char * me = GC_lookup_thread(pthread_self()); +# endif GC_ASSERT((char *)tsd > me && (char *)tsd < me + 1000); } UNLOCK(); @@ -273,7 +260,7 @@ void GC_mark_thread_local_fls_for(GC_tlfs p) } #endif /* GC_ASSERTIONS */ -# else /* !THREAD_LOCAL_ALLOC && !DBG_HDRS_ALL */ +# else /* !THREAD_LOCAL_ALLOC */ # define GC_destroy_thread_local(t) diff --git a/threadlibs.c b/threadlibs.c index 178a7ec..1309694 100644 --- a/threadlibs.c +++ b/threadlibs.c @@ -11,7 +11,6 @@ int main() "-Wl,--wrap -Wl,pthread_sigmask -Wl,--wrap -Wl,sleep\n"); # endif # if defined(GC_LINUX_THREADS) || defined(GC_IRIX_THREADS) \ - || defined(GC_SOLARIS_PTHREADS) \ || defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS) # ifdef GC_USE_DLOPEN_WRAP printf("-ldl "); @@ -31,8 +30,9 @@ int main() # if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS) printf("-lpthread -lrt\n"); # endif -# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS) - printf("-lthread -ldl\n"); +# if defined(GC_SOLARIS_THREADS) || defined(GC_SOLARIS_PTHREADS) + printf("-lthread -lposix4\n"); + /* Is this right for recent versions? */ # endif # if defined(GC_WIN32_THREADS) && defined(CYGWIN32) printf("-lpthread\n"); diff --git a/typd_mlc.c b/typd_mlc.c index cdedf46..1124ff9 100644 --- a/typd_mlc.c +++ b/typd_mlc.c @@ -587,9 +587,9 @@ void * GC_malloc_explicitly_typed(size_t lb, GC_descr d) if(SMALL_OBJ(lb)) { lg = GC_size_map[lb]; opp = &(GC_eobjfreelist[lg]); - FASTLOCK(); - if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) { - FASTUNLOCK(); + LOCK(); + if( (op = *opp) == 0 ) { + UNLOCK(); op = (ptr_t)GENERAL_MALLOC((word)lb, GC_explicit_kind); if (0 == op) return 0; lg = GC_size_map[lb]; /* May have been uninitialized. */ @@ -597,7 +597,7 @@ void * GC_malloc_explicitly_typed(size_t lb, GC_descr d) *opp = obj_link(op); obj_link(op) = 0; GC_bytes_allocd += GRANULES_TO_BYTES(lg); - FASTUNLOCK(); + UNLOCK(); } } else { op = (ptr_t)GENERAL_MALLOC((word)lb, GC_explicit_kind); @@ -620,16 +620,16 @@ DCL_LOCK_STATE; if( SMALL_OBJ(lb) ) { lg = GC_size_map[lb]; opp = &(GC_eobjfreelist[lg]); - FASTLOCK(); - if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) { - FASTUNLOCK(); + LOCK(); + if( (op = *opp) == 0 ) { + UNLOCK(); op = (ptr_t)GENERAL_MALLOC_IOP(lb, GC_explicit_kind); lg = GC_size_map[lb]; /* May have been uninitialized. */ } else { *opp = obj_link(op); obj_link(op) = 0; GC_bytes_allocd += GRANULES_TO_BYTES(lg); - FASTUNLOCK(); + UNLOCK(); } } else { op = (ptr_t)GENERAL_MALLOC_IOP(lb, GC_explicit_kind); @@ -669,9 +669,9 @@ DCL_LOCK_STATE; if( SMALL_OBJ(lb) ) { lg = GC_size_map[lb]; opp = &(GC_arobjfreelist[lg]); - FASTLOCK(); - if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) { - FASTUNLOCK(); + LOCK(); + if( (op = *opp) == 0 ) { + UNLOCK(); op = (ptr_t)GENERAL_MALLOC((word)lb, GC_array_kind); if (0 == op) return(0); lg = GC_size_map[lb]; /* May have been uninitialized. */ @@ -679,7 +679,7 @@ DCL_LOCK_STATE; *opp = obj_link(op); obj_link(op) = 0; GC_bytes_allocd += GRANULES_TO_BYTES(lg); - FASTUNLOCK(); + UNLOCK(); } } else { op = (ptr_t)GENERAL_MALLOC((word)lb, GC_array_kind); diff --git a/version.h b/version.h index 9c34ccc..b71dd6b 100644 --- a/version.h +++ b/version.h @@ -3,7 +3,7 @@ /* it to keep the old-style build process working. */ #define GC_TMP_VERSION_MAJOR 7 #define GC_TMP_VERSION_MINOR 0 -#define GC_TMP_ALPHA_VERSION 4 +#define GC_TMP_ALPHA_VERSION 5 #ifndef GC_NOT_ALPHA # define GC_NOT_ALPHA 0xff diff --git a/win32_threads.c b/win32_threads.c index 8609900..214d5c1 100755 --- a/win32_threads.c +++ b/win32_threads.c @@ -4,6 +4,24 @@ #include +#ifdef THREAD_LOCAL_ALLOC +# include "private/thread_local_alloc.h" +#endif /* THREAD_LOCAL_ALLOC */ + +/* Allocation lock declarations. */ +#if !defined(USE_PTHREAD_LOCKS) +# if defined(GC_DLL) + __declspec(dllexport) CRITICAL_SECTION GC_allocate_ml; +# else + CRITICAL_SECTION GC_allocate_ml; +# endif + DWORD GC_lock_holder = NO_THREAD; + /* Thread id for current holder of allocation lock */ +#else + pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER; + pthread_t GC_lock_holder = NO_THREAD; +#endif + #ifdef CYGWIN32 # include @@ -16,41 +34,102 @@ # define DEBUG_CYGWIN_THREADS 0 - void * GC_start_routine(void * arg); + void * GC_pthread_start(void * arg); void GC_thread_exit_proc(void *arg); # include #endif +#if defined(GC_DLL) && !defined(MSWINCE) + static GC_bool GC_win32_dll_threads = FALSE; + /* This code operates in two distinct modes, depending on */ + /* the setting of GC_win32_dll_threads. If */ + /* GC_win32_dll_threads is set, all threads in the process */ + /* are implicitly registered with the GC by DllMain. */ + /* No explicit registration is required, and attempts at */ + /* explicit registration are ignored. This mode is */ + /* very different from the Posix operation of the collector. */ + /* In this mode access to the thread table is lock-free. */ + /* Hence there is a static limit on the number of threads. */ + + /* If GC_win32_dll_threads is FALSE, or the collector is */ + /* built without GC_DLL defined, things operate in a way */ + /* that is very similar to Posix platforms, and new threads */ + /* must be registered with the collector, e.g. by using */ + /* preprocessor-based interception of the thread primitives. */ + /* In this case, we use a real data structure for the thread */ + /* table. Note that there is no equivalent of linker-based */ + /* call interception, since we don't have ELF-like */ + /* facilities. The Windows analog appears to be "API */ + /* hooking", which really seems to be a standard way to */ + /* do minor binary rewriting (?). I'd prefer not to have */ + /* the basic collector rely on such facilities, but an */ + /* optional package that intercepts thread calls this way */ + /* would probably be nice. */ + + /* GC_win32_dll_threads must be set at initialization time, */ + /* i.e. before any collector or thread calls. We make it a */ + /* "dynamic" option only to avoid multiple library versions. */ +#else +# define GC_win32_dll_threads FALSE +#endif + /* The type of the first argument to InterlockedExchange. */ /* Documented to be LONG volatile *, but at least gcc likes */ /* this better. */ typedef LONG * IE_t; -#ifndef MAX_THREADS -# define MAX_THREADS 256 - /* FIXME: */ - /* Things may get quite slow for large numbers of threads, */ - /* since we look them up with sequential search. */ -#endif - GC_bool GC_thr_initialized = FALSE; +GC_bool GC_need_to_lock = FALSE; + +static GC_bool parallel_initialized = FALSE; + +void GC_init_parallel(void); + #ifdef GC_DLL - GC_API GC_bool GC_need_to_lock = TRUE; + /* Turn on GC_win32_dll_threads */ + GC_API void GC_use_DllMain(void) + { +# ifdef THREAD_LOCAL_ALLOC + ABORT("Cannot use thread local allocation with DllMain-based " + "thread registration."); + /* Thread-local allocation really wants to lock at thread */ + /* entry and exit. */ +# endif + GC_need_to_lock = TRUE; /* Cannot intercept thread creation. */ + GC_ASSERT(GC_gc_no == 0); + GC_win32_dll_threads = TRUE; + } #else - GC_bool GC_need_to_lock = FALSE; + GC_API void GC_use_DllMain(void) + { + ABORT("GC not configured as DLL"); + } #endif DWORD GC_main_thread = 0; -struct GC_thread_Rep { - AO_t in_use; /* Updated without lock. */ - /* We assert that unused */ - /* entries have invalid ids of */ - /* zero and zero stack fields. */ +struct GC_Thread_Rep { + union { + AO_t tm_in_use; /* Updated without lock. */ + /* We assert that unused */ + /* entries have invalid ids of */ + /* zero and zero stack fields. */ + /* Used only with GC_win32_dll_threads. */ + struct GC_Thread_Rep * tm_next; + /* Hash table link without */ + /* GC_win32_dll_threads. */ + /* More recently allocated threads */ + /* with a given pthread id come */ + /* first. (All but the first are */ + /* guaranteed to be dead, but we may */ + /* not yet have registered the join.) */ + } table_management; +# define in_use table_management.tm_in_use +# define next table_management.tm_next DWORD id; HANDLE handle; ptr_t stack_base; /* The cold end of the stack. */ @@ -65,9 +144,13 @@ struct GC_thread_Rep { # define FINISHED 1 /* Thread has exited. */ # define DETACHED 2 /* Thread is intended to be detached. */ # endif +# ifdef THREAD_LOCAL_ALLOC + struct thread_local_freelists tlfs; +# endif }; -typedef volatile struct GC_thread_Rep * GC_thread; +typedef struct GC_Thread_Rep * GC_thread; + /* * We assumed that volatile ==> memory ordering, at least among @@ -76,36 +159,78 @@ typedef volatile struct GC_thread_Rep * GC_thread; volatile GC_bool GC_please_stop = FALSE; -/* - * FIXME: At initialization time we should perhaps chose - * between two different thread table representations. This simple - * linear representation may be the best we can reliably do if we use - * DllMain. By default we should probably rely on thread registration - * as with the other platforms, and use a hash table or other real - * data structure. - */ -volatile struct GC_thread_Rep thread_table[MAX_THREADS]; +/* We have two versions of the thread table. Which one */ +/* we us depends on whether or not GC_win32_dll_threads */ +/* is set. The one complication is that at process */ +/* startup, we use both, since the client hasn't yet */ +/* had a chance to tell us which one (s)he wants. */ +static GC_bool client_has_run = FALSE; + +/* Thread table used if GC_win32_dll_threads is set. */ +/* This is a fixed size array. */ +/* Since we use runtime conditionals, both versions */ +/* are always defined. */ +# ifndef MAX_THREADS +# define MAX_THREADS 512 +# endif + /* Things may get quite slow for large numbers of threads, */ + /* since we look them up with sequential search. */ + + volatile struct GC_Thread_Rep dll_thread_table[MAX_THREADS]; + + volatile LONG GC_max_thread_index = 0; + /* Largest index in dll_thread_table */ + /* that was ever used. */ + +/* And now the version used if GC_win32_dll_threads is not set. */ +/* This is a chained hash table, with much of the code borrowed */ +/* From the Posix implementation. */ +# define THREAD_TABLE_SZ 256 /* Must be power of 2 */ + volatile GC_thread GC_threads[THREAD_TABLE_SZ]; + -volatile LONG GC_max_thread_index = 0; /* Largest index in thread_table */ - /* that was ever used. */ +/* Add a thread to GC_threads. We assume it wasn't already there. */ +/* Caller holds allocation lock. */ +/* Unlike the pthreads version, the id field is set by the caller. */ +GC_thread GC_new_thread(DWORD id) +{ + int hv = ((word)id) % THREAD_TABLE_SZ; + GC_thread result; + /* It may not be safe to allocate when we register the first thread. */ + static struct GC_Thread_Rep first_thread; + static GC_bool first_thread_used = FALSE; + + GC_ASSERT(I_HOLD_LOCK()); + if (!first_thread_used) { + result = &first_thread; + first_thread_used = TRUE; + } else { + GC_ASSERT(!GC_win32_dll_threads); + result = (struct GC_Thread_Rep *) + GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL); + GC_ASSERT(result -> flags == 0); + } + if (result == 0) return(0); + /* result -> id = id; Done by caller. */ + result -> next = GC_threads[hv]; + GC_threads[hv] = result; + GC_ASSERT(result -> flags == 0 /* && result -> thread_blocked == 0 */); + return(result); +} extern LONG WINAPI GC_write_fault_handler(struct _EXCEPTION_POINTERS *exc_info); /* * This may be called from DllMain, and hence operates under unusual - * constraints. In particular, it must be lock-free. - * Always called from the thread being added. + * constraints. In particular, it must be lock-free if GC_win32_dll_threads + * is set. Always called from the thread being added. + * If GC_win32_dll_threads is not set, we already hold the allocation lock, + * except possibly during single-threaded start-up code. */ static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb, DWORD thread_id) { - int i; - /* It appears to be unsafe to acquire a lock here, since this */ - /* code is apparently not preeemptible on some systems. */ - /* (This is based on complaints, not on Microsoft's official */ - /* documentation, which says this should perform "only simple */ - /* initialization tasks".) */ - /* Hence we make do with nonblocking synchronization. */ + volatile struct GC_Thread_Rep * me; /* The following should be a noop according to the win32 */ /* documentation. There is empirical evidence that it */ @@ -113,39 +238,60 @@ static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb, # if defined(MPROTECT_VDB) if (GC_incremental) SetUnhandledExceptionFilter(GC_write_fault_handler); # endif + + if (GC_win32_dll_threads || !client_has_run) { + int i; + /* It appears to be unsafe to acquire a lock here, since this */ + /* code is apparently not preeemptible on some systems. */ + /* (This is based on complaints, not on Microsoft's official */ + /* documentation, which says this should perform "only simple */ + /* initialization tasks".) */ + /* Hence we make do with nonblocking synchronization. */ + /* It has been claimed that DllMain is really only executed with */ + /* a particular system lock held, and thus careful use of locking */ + /* around code that doesn't call back into the system libraries */ + /* might be OK. But this hasn't been tested across all win32 */ + /* variants. */ /* cast away volatile qualifier */ - for (i = 0; InterlockedExchange((IE_t)&thread_table[i].in_use,1) != 0; i++) { - /* Compare-and-swap would make this cleaner, but that's not */ - /* supported before Windows 98 and NT 4.0. In Windows 2000, */ - /* InterlockedExchange is supposed to be replaced by */ - /* InterlockedExchangePointer, but that's not really what I */ - /* want here. */ - /* FIXME: We should eventually declare Win95 dead and use AO_ */ - /* primitives here. */ - if (i == MAX_THREADS - 1) - ABORT("too many threads"); - } - /* Update GC_max_thread_index if necessary. The following is safe, */ - /* and unlike CompareExchange-based solutions seems to work on all */ - /* Windows95 and later platforms. */ - /* Unfortunately, GC_max_thread_index may be temporarily out of */ - /* bounds, so readers have to compensate. */ - while (i > GC_max_thread_index) { - InterlockedIncrement((IE_t)&GC_max_thread_index); + for (i = 0; InterlockedExchange((IE_t)&dll_thread_table[i].in_use,1) != 0; + i++) { + /* Compare-and-swap would make this cleaner, but that's not */ + /* supported before Windows 98 and NT 4.0. In Windows 2000, */ + /* InterlockedExchange is supposed to be replaced by */ + /* InterlockedExchangePointer, but that's not really what I */ + /* want here. */ + /* FIXME: We should eventually declare Win95 dead and use AO_ */ + /* primitives here. */ + if (i == MAX_THREADS - 1) + ABORT("too many threads"); + } + /* Update GC_max_thread_index if necessary. The following is safe, */ + /* and unlike CompareExchange-based solutions seems to work on all */ + /* Windows95 and later platforms. */ + /* Unfortunately, GC_max_thread_index may be temporarily out of */ + /* bounds, so readers have to compensate. */ + while (i > GC_max_thread_index) { + InterlockedIncrement((IE_t)&GC_max_thread_index); + } + if (GC_max_thread_index >= MAX_THREADS) { + /* We overshot due to simultaneous increments. */ + /* Setting it to MAX_THREADS-1 is always safe. */ + GC_max_thread_index = MAX_THREADS - 1; + } + me = dll_thread_table + i; } - if (GC_max_thread_index >= MAX_THREADS) { - /* We overshot due to simultaneous increments. */ - /* Setting it to MAX_THREADS-1 is always safe. */ - GC_max_thread_index = MAX_THREADS - 1; + if (!GC_win32_dll_threads || !client_has_run) { + GC_ASSERT(I_HOLD_LOCK() || !client_has_run); + me = GC_new_thread(thread_id); } # ifdef CYGWIN32 - thread_table[i].pthread_id = pthread_self(); + me -> pthread_id = pthread_self(); # endif if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), GetCurrentProcess(), - (HANDLE*)&thread_table[i].handle, + (HANDLE*)&(me -> handle), 0, 0, DUPLICATE_SAME_ACCESS)) { @@ -153,19 +299,25 @@ static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb, GC_err_printf("Last error code: %d\n", last_error); ABORT("DuplicateHandle failed"); } - thread_table[i].stack_base = sb -> mem_base; + me -> stack_base = sb -> mem_base; /* Up until this point, GC_push_all_stacks considers this thread */ /* invalid. */ - if (thread_table[i].stack_base == NULL) - ABORT("Bad stack base in GC_register_my_thread"); + if (me -> stack_base == NULL) + ABORT("Bad stack base in GC_register_my_thread_inner"); /* Up until this point, this entry is viewed as reserved but invalid */ /* by GC_delete_thread. */ - thread_table[i].id = thread_id; + me -> id = thread_id; +# if defined(THREAD_LOCAL_ALLOC) + GC_init_thread_local((GC_tlfs)(&(me->tlfs))); +# endif + GC_ASSERT(!GC_please_stop || GC_win32_dll_threads); + /* Otherwise both we and the thread stopping code would be */ + /* holding the allocation lock. */ /* If this thread is being created while we are trying to stop */ /* the world, wait here. Hopefully this can't happen on any */ /* systems that don't allow us to block here. */ while (GC_please_stop) Sleep(20); - return thread_table + i; + return (GC_thread)(me); } /* @@ -187,149 +339,326 @@ LONG GC_get_max_thread_index() /* without a lock, but should be called in contexts in which the */ /* requested thread cannot be asynchronously deleted, e.g. from the */ /* thread itself. */ -static GC_thread GC_lookup_thread(DWORD thread_id) { - int i; - LONG my_max = GC_get_max_thread_index(); - - for (i = 0; +/* This version assumes that either GC_win32_dll_threads is set, or */ +/* we hold the allocator lock. */ +/* Also used (for assertion checking only) from thread_local_alloc.c. */ +GC_thread GC_lookup_thread_inner(DWORD thread_id) { + if (GC_win32_dll_threads) { + int i; + LONG my_max = GC_get_max_thread_index(); + for (i = 0; i <= my_max && - (!AO_load_acquire(&(thread_table[i].in_use)) - || thread_table[i].id != thread_id); + (!AO_load_acquire(&(dll_thread_table[i].in_use)) + || dll_thread_table[i].id != thread_id); /* Must still be in_use, since nobody else can store our thread_id. */ i++) {} - if (i > my_max) { - return 0; + if (i > my_max) { + return 0; + } else { + return (GC_thread)(dll_thread_table + i); + } } else { - return thread_table + i; + int hv = ((word)thread_id) % THREAD_TABLE_SZ; + register GC_thread p = GC_threads[hv]; + + GC_ASSERT(I_HOLD_LOCK()); + while (p != 0 && p -> id != thread_id) p = p -> next; + return(p); } } -int GC_register_my_thread(struct GC_stack_base *sb) { - DWORD t = GetCurrentThreadId(); - - if (0 == GC_lookup_thread(t)) { - /* We lock here, since we want to wait for an ongoing GC. */ +/* A version of the above that acquires the lock if necessary. Note */ +/* that the identically named function for pthreads is different, and */ +/* just assumes we hold the lock. */ +/* Also used (for assertion checking only) from thread_local_alloc.c. */ +static GC_thread GC_lookup_thread(DWORD thread_id) +{ + if (GC_win32_dll_threads) { + return GC_lookup_thread_inner(thread_id); + } else { + GC_thread result; LOCK(); - GC_register_my_thread_inner(sb, t); + result = GC_lookup_thread_inner(thread_id); UNLOCK(); - return GC_SUCCESS; - } else { - return GC_DUPLICATE; + return result; } } -/* This is intended to be lock-free. */ -/* It is either called synchronously from the thread being deleted, */ -/* or by the joining thread. */ -static void GC_delete_gc_thread(GC_thread thr) +/* If a thread has been joined, but we have not yet */ +/* been notified, then there may be more than one thread */ +/* in the table with the same win32 id. */ +/* This is OK, but we need a way to delete a specific one. */ +/* Assumes we hold the allocation lock unless */ +/* GC_win32_dll_threads is set. */ +/* If GC_win32_dll_threads is set it should be called from the */ +/* thread being deleted. */ +void GC_delete_gc_thread(GC_thread gc_id) { - CloseHandle(thr->handle); + if (GC_win32_dll_threads) { + /* This is intended to be lock-free. */ + /* It is either called synchronously from the thread being deleted, */ + /* or by the joining thread. */ + CloseHandle(gc_id->handle); /* cast away volatile qualifier */ - thr->stack_base = 0; - thr->id = 0; + gc_id -> stack_base = 0; + gc_id -> id = 0; # ifdef CYGWIN32 - thr->pthread_id = 0; + gc_id -> pthread_id = 0; # endif /* CYGWIN32 */ - AO_store_release(&(thr->in_use), FALSE); + AO_store_release(&(gc_id->in_use), FALSE); + } else { + DWORD id = gc_id -> id; + int hv = ((word)id) % THREAD_TABLE_SZ; + register GC_thread p = GC_threads[hv]; + register GC_thread prev = 0; + + GC_ASSERT(I_HOLD_LOCK()); + while (p != gc_id) { + prev = p; + p = p -> next; + } + if (prev == 0) { + GC_threads[hv] = p -> next; + } else { + prev -> next = p -> next; + } + GC_INTERNAL_FREE(p); + } } +/* Delete a thread from GC_threads. We assume it is there. */ +/* (The code intentionally traps if it wasn't.) */ +/* Assumes we hold the allocation lock unless */ +/* GC_win32_dll_threads is set. */ +/* If GC_win32_dll_threads is set it should be called from the */ +/* thread being deleted. */ +void GC_delete_thread(DWORD id) +{ + if (GC_win32_dll_threads) { + GC_thread t = GC_lookup_thread_inner(id); -static void GC_delete_thread(DWORD thread_id) { - GC_thread t = GC_lookup_thread(thread_id); + if (0 == t) { + WARN("Removing nonexistent thread %ld\n", (GC_word)id); + } else { + GC_delete_gc_thread(t); + } + } else { + int hv = ((word)id) % THREAD_TABLE_SZ; + register GC_thread p = GC_threads[hv]; + register GC_thread prev = 0; + + GC_ASSERT(I_HOLD_LOCK()); + while (p -> id != id) { + prev = p; + p = p -> next; + } + if (prev == 0) { + GC_threads[hv] = p -> next; + } else { + prev -> next = p -> next; + } + GC_INTERNAL_FREE(p); + } +} + +int GC_register_my_thread(struct GC_stack_base *sb) { + DWORD t = GetCurrentThreadId(); - if (0 == t) { - WARN("Removing nonexistent thread %ld\n", (GC_word)thread_id); + if (0 == GC_lookup_thread(t)) { + /* We lock here, since we want to wait for an ongoing GC. */ + LOCK(); + GC_register_my_thread_inner(sb, t); + UNLOCK(); + return GC_SUCCESS; } else { - GC_delete_gc_thread(t); + return GC_DUPLICATE; } } int GC_unregister_my_thread(void) { - GC_delete_thread(GetCurrentThreadId()); + if (GC_win32_dll_threads) { + /* Should we just ignore this? */ + GC_delete_thread(GetCurrentThreadId()); + } else { + LOCK(); + GC_delete_thread(GetCurrentThreadId()); + UNLOCK(); + } +# if defined(THREAD_LOCAL_ALLOC) + LOCK(); + { + GC_thread me = GC_lookup_thread_inner(GetCurrentThreadId()); + GC_destroy_thread_local(&(me->tlfs)); + } + UNLOCK(); +# endif return GC_SUCCESS; } #ifdef CYGWIN32 +/* A quick-and-dirty cache of the mapping between pthread_t */ +/* and win32 thread id. */ +#define PTHREAD_MAP_SIZE 512 +DWORD GC_pthread_map_cache[PTHREAD_MAP_SIZE]; +#define HASH(pthread_id) ((((word)(pthread_id) >> 5)) % PTHREAD_MAP_SIZE) + /* It appears pthread_t is really a pointer type ... */ +#define SET_PTHREAD_MAP_CACHE(pthread_id, win32_id) \ + GC_pthread_map_cache[HASH(pthread_id)] = (win32_id); +#define GET_PTHREAD_MAP_CACHE(pthread_id) \ + GC_pthread_map_cache[HASH(pthread_id)] + /* Return a GC_thread corresponding to a given pthread_t. */ /* Returns 0 if it's not there. */ /* We assume that this is only called for pthread ids that */ /* have not yet terminated or are still joinable, and */ /* cannot be concurrently terminated. */ +/* Assumes we do NOT hold the allocation lock. */ static GC_thread GC_lookup_pthread(pthread_t id) { - int i; - LONG my_max = GC_get_max_thread_index(); + if (GC_win32_dll_threads) { + int i; + LONG my_max = GC_get_max_thread_index(); - for (i = 0; - i <= my_max && - (!AO_load_acquire(&(thread_table[i].in_use)) - || thread_table[i].pthread_id != id); + for (i = 0; + i <= my_max && + (!AO_load_acquire(&(dll_thread_table[i].in_use)) + || dll_thread_table[i].pthread_id != id); /* Must still be in_use, since nobody else can store our thread_id. */ i++); - if (i > my_max) return 0; - return thread_table + i; + if (i > my_max) return 0; + return (GC_thread)(dll_thread_table + i); + } else { + /* We first try the cache. If that fails, we use a very slow */ + /* approach. */ + int hv_guess = GET_PTHREAD_MAP_CACHE(id) % THREAD_TABLE_SZ; + int hv; + GC_thread p; + + LOCK(); + for (p = GC_threads[hv_guess]; 0 != p; p = p -> next) { + if (pthread_equal(p -> pthread_id, id)) + goto foundit; + } + for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) { + for (p = GC_threads[hv]; 0 != p; p = p -> next) { + if (pthread_equal(p -> pthread_id, id)) + goto foundit; + } + } + p = 0; + foundit: + UNLOCK(); + return p; + } } #endif /* CYGWIN32 */ void GC_push_thread_structures(void) { + GC_ASSERT(I_HOLD_LOCK()); + if (GC_win32_dll_threads) { /* Unlike the other threads implementations, the thread table here */ /* contains no pointers to the collectable heap. Thus we have */ /* no private structures we need to preserve. */ -# ifdef CYGWIN32 - { int i; /* pthreads may keep a pointer in the thread exit value */ - LONG my_max = GC_get_max_thread_index(); +# ifdef CYGWIN32 + { int i; /* pthreads may keep a pointer in the thread exit value */ + LONG my_max = GC_get_max_thread_index(); - for (i = 0; i <= my_max; i++) - if (thread_table[i].in_use) - GC_push_all((ptr_t)&(thread_table[i].status), - (ptr_t)(&(thread_table[i].status)+1)); + for (i = 0; i <= my_max; i++) + if (dll_thread_table[i].in_use) + GC_push_all((ptr_t)&(dll_thread_table[i].status), + (ptr_t)(&(dll_thread_table[i].status)+1)); + } +# endif + } else { + GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads)); } +# if defined(THREAD_LOCAL_ALLOC) + GC_push_all((ptr_t)(&GC_thread_key), + (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key)); + /* Just in case we ever use our own TLS implementation. */ # endif } +/* Suspend the given thread, if it's still active. */ +GC_suspend(GC_thread t) +{ +# ifdef MSWINCE + /* SuspendThread will fail if thread is running kernel code */ + while (SuspendThread(t -> handle) == (DWORD)-1) + Sleep(10); +# else + /* Apparently the Windows 95 GetOpenFileName call creates */ + /* a thread that does not properly get cleaned up, and */ + /* SuspendThread on its descriptor may provoke a crash. */ + /* This reduces the probability of that event, though it still */ + /* appears there's a race here. */ + DWORD exitCode; + if (GetExitCodeThread(t -> handle, &exitCode) && + exitCode != STILL_ACTIVE) { + t -> stack_base = 0; /* prevent stack from being pushed */ +# ifndef CYGWIN32 + /* this breaks pthread_join on Cygwin, which is guaranteed to */ + /* only see user pthreads */ + AO_store(&(t -> in_use), FALSE); + CloseHandle(t -> handle); +# endif + return; + } + if (SuspendThread(t -> handle) == (DWORD)-1) + ABORT("SuspendThread failed"); +# endif + t -> suspended = TRUE; +} + +/* Defined in misc.c */ +#ifndef CYGWIN32 + extern CRITICAL_SECTION GC_write_cs; +#endif + void GC_stop_world(void) { DWORD thread_id = GetCurrentThreadId(); int i; if (!GC_thr_initialized) ABORT("GC_stop_world() called before GC_thr_init()"); + GC_ASSERT(I_HOLD_LOCK()); GC_please_stop = TRUE; - for (i = 0; i <= GC_get_max_thread_index(); i++) - if (thread_table[i].stack_base != 0 - && thread_table[i].id != thread_id) { -# ifdef MSWINCE - /* SuspendThread will fail if thread is running kernel code */ - while (SuspendThread(thread_table[i].handle) == (DWORD)-1) - Sleep(10); -# else - /* Apparently the Windows 95 GetOpenFileName call creates */ - /* a thread that does not properly get cleaned up, and */ - /* SuspendThread on its descriptor may provoke a crash. */ - /* This reduces the probability of that event, though it still */ - /* appears there's a race here. */ - DWORD exitCode; - if (GetExitCodeThread(thread_table[i].handle,&exitCode) && - exitCode != STILL_ACTIVE) { - thread_table[i].stack_base = 0; /* prevent stack from being pushed */ -# ifndef CYGWIN32 - /* this breaks pthread_join on Cygwin, which is guaranteed to */ - /* only see user pthreads */ - AO_store(&(thread_table[i].in_use), FALSE); - CloseHandle(thread_table[i].handle); -# endif - continue; - } - if (SuspendThread(thread_table[i].handle) == (DWORD)-1) - ABORT("SuspendThread failed"); -# endif - thread_table[i].suspended = TRUE; +# ifndef CYGWIN32 + EnterCriticalSection(&GC_write_cs); +# endif + if (GC_win32_dll_threads) { + /* Any threads being created during this loop will end up sleeping */ + /* in the thread registration code until GC_please_stop becomes */ + /* false. This is not ideal, but hopefully correct. */ + for (i = 0; i <= GC_get_max_thread_index(); i++) { + volatile struct GC_Thread_Rep * t = dll_thread_table + i; + if (t -> stack_base != 0 + && t -> id != thread_id) { + GC_suspend((GC_thread)t); + } } + } else { + GC_thread t; + int i; + + for (i = 0; i < THREAD_TABLE_SZ; i++) { + for (t = GC_threads[i]; t != 0; t = t -> next) { + if (t -> stack_base != 0 + && t -> id != thread_id) { + GC_suspend(t); + } + } + } + } +# ifndef CYGWIN32 + LeaveCriticalSection(&GC_write_cs); +# endif } void GC_start_world(void) @@ -338,33 +667,34 @@ void GC_start_world(void) int i; LONG my_max = GC_get_max_thread_index(); - for (i = 0; i <= my_max; i++) - if (thread_table[i].stack_base != 0 && thread_table[i].suspended - && thread_table[i].id != thread_id) { - if (ResumeThread(thread_table[i].handle) == (DWORD)-1) - ABORT("ResumeThread failed"); - thread_table[i].suspended = FALSE; + GC_ASSERT(I_HOLD_LOCK()); + if (GC_win32_dll_threads) { + for (i = 0; i <= my_max; i++) { + GC_thread t = (GC_thread)(dll_thread_table + i); + if (t -> stack_base != 0 && t -> suspended + && t -> id != thread_id) { + if (ResumeThread(t -> handle) == (DWORD)-1) + ABORT("ResumeThread failed"); + t -> suspended = FALSE; + } } - GC_please_stop = FALSE; -} - -# ifdef _MSC_VER -# pragma warning(disable:4715) -# endif -ptr_t GC_current_stackbottom(void) -{ - DWORD thread_id = GetCurrentThreadId(); - int i; - LONG my_max = GC_get_max_thread_index(); + } else { + GC_thread t; + int i; - for (i = 0; i <= my_max; i++) - if (thread_table[i].stack_base && thread_table[i].id == thread_id) - return thread_table[i].stack_base; - ABORT("no thread table entry for current thread"); + for (i = 0; i < THREAD_TABLE_SZ; i++) { + for (t = GC_threads[i]; t != 0; t = t -> next) { + if (t -> stack_base != 0 && t -> suspended + && t -> id != thread_id) { + if (ResumeThread(t -> handle) == (DWORD)-1) + ABORT("ResumeThread failed"); + t -> suspended = FALSE; + } + } + } + } + GC_please_stop = FALSE; } -# ifdef _MSC_VER -# pragma warning(default:4715) -# endif # ifdef MSWINCE /* The VirtualQuery calls below won't work properly on WinCE, but */ @@ -387,26 +717,19 @@ ptr_t GC_current_stackbottom(void) } # endif -void GC_push_all_stacks(void) +void GC_push_stack_for(GC_thread thread) { - DWORD thread_id = GetCurrentThreadId(); - GC_bool found_me = FALSE; - int i; - int dummy; - ptr_t sp, stack_min; - GC_thread thread; - LONG my_max = GC_get_max_thread_index(); - - for (i = 0; i <= my_max; i++) { - thread = thread_table + i; - if (thread -> in_use && thread -> stack_base) { - if (thread -> id == thread_id) { + int dummy; + ptr_t sp, stack_min; + DWORD me = GetCurrentThreadId(); + + if (thread -> stack_base) { + if (thread -> id == me) { sp = (ptr_t) &dummy; - found_me = TRUE; } else { CONTEXT context; context.ContextFlags = CONTEXT_INTEGER|CONTEXT_CONTROL; - if (!GetThreadContext(thread_table[i].handle, &context)) + if (!GetThreadContext(thread -> handle, &context)) ABORT("GetThreadContext failed"); /* Push all registers that might point into the heap. Frame */ @@ -446,17 +769,49 @@ void GC_push_all_stacks(void) # else # error "architecture is not supported" # endif - } + } /* ! current thread */ stack_min = GC_get_stack_min(thread->stack_base); - if (sp >= stack_min && sp < thread->stack_base) + if (sp >= stack_min && sp < thread->stack_base) { +# if DEBUG_CYGWIN_THREADS + GC_printf("Pushing thread from %p to %p for %d from %d\n", + sp, thread -> stack_base, thread -> id, me); +# endif GC_push_all_stack(sp, thread->stack_base); - else { + } else { WARN("Thread stack pointer 0x%lx out of range, pushing everything\n", (unsigned long)sp); GC_push_all_stack(stack_min, thread->stack_base); } + } /* thread looks live */ +} + +void GC_push_all_stacks(void) +{ + DWORD me = GetCurrentThreadId(); + GC_bool found_me = FALSE; + + if (GC_win32_dll_threads) { + int i; + LONG my_max = GC_get_max_thread_index(); + + for (i = 0; i <= my_max; i++) { + GC_thread t = (GC_thread)(dll_thread_table + i); + if (t -> in_use) { + GC_push_stack_for(t); + if (t -> id == me) found_me = TRUE; + } + } + } else { + GC_thread t; + int i; + + for (i = 0; i < THREAD_TABLE_SZ; i++) { + for (t = GC_threads[i]; t != 0; t = t -> next) { + GC_push_stack_for(t); + if (t -> id == me) found_me = TRUE; + } } } if (!found_me) ABORT("Collecting from unknown thread."); @@ -467,14 +822,29 @@ void GC_get_next_stack(char *start, char **lo, char **hi) int i; # define ADDR_LIMIT (char *)(-1L) char * current_min = ADDR_LIMIT; - LONG my_max = GC_get_max_thread_index(); + + if (GC_win32_dll_threads) { + LONG my_max = GC_get_max_thread_index(); - for (i = 0; i <= my_max; i++) { - char * s = (char *)thread_table[i].stack_base; + for (i = 0; i <= my_max; i++) { + ptr_t s = (ptr_t)(dll_thread_table[i].stack_base); if (0 != s && s > start && s < current_min) { current_min = s; } + } + } else { + for (i = 0; i < THREAD_TABLE_SZ; i++) { + GC_thread t; + + for (t = GC_threads[i]; t != 0; t = t -> next) { + ptr_t s = (ptr_t)(t -> stack_base); + + if (0 != s && s > start && s < current_min) { + current_min = s; + } + } + } } *hi = current_min; if (current_min == ADDR_LIMIT) { @@ -487,21 +857,6 @@ void GC_get_next_stack(char *start, char **lo, char **hi) #if !defined(CYGWIN32) -#if !defined(MSWINCE) && defined(GC_DLL) - -/* We register threads from DllMain */ - -GC_API HANDLE WINAPI GC_CreateThread( - LPSECURITY_ATTRIBUTES lpThreadAttributes, - DWORD dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress, - LPVOID lpParameter, DWORD dwCreationFlags, LPDWORD lpThreadId ) -{ - return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, - lpParameter, dwCreationFlags, lpThreadId); -} - -#else /* defined(MSWINCE) || !defined(GC_DLL)) */ - /* We have no DllMain to take care of new threads. Thus we */ /* must properly intercept thread creation. */ @@ -521,37 +876,42 @@ GC_API HANDLE WINAPI GC_CreateThread( thread_args *args; - if (!GC_is_initialized) GC_init(); - /* make sure GC is initialized (i.e. main thread is attached) */ - - args = GC_malloc_uncollectable(sizeof(thread_args)); + if (!parallel_initialized) GC_init_parallel(); + /* make sure GC is initialized (i.e. main thread is attached, + tls initialized) */ + + client_has_run = TRUE; + if (GC_win32_dll_threads) { + return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, + lpParameter, dwCreationFlags, lpThreadId); + } else { + args = GC_malloc_uncollectable(sizeof(thread_args)); /* Handed off to and deallocated by child thread. */ - if (0 == args) { + if (0 == args) { SetLastError(ERROR_NOT_ENOUGH_MEMORY); return NULL; - } + } - /* set up thread arguments */ + /* set up thread arguments */ args -> start = lpStartAddress; args -> param = lpParameter; - GC_need_to_lock = TRUE; - thread_h = CreateThread(lpThreadAttributes, - dwStackSize, thread_start, - args, dwCreationFlags, - lpThreadId); + GC_need_to_lock = TRUE; + thread_h = CreateThread(lpThreadAttributes, + dwStackSize, GC_win32_start, + args, dwCreationFlags, + lpThreadId); - return thread_h; + return thread_h; + } } -static DWORD WINAPI thread_start(LPVOID arg) +void * GC_win32_start_inner(struct GC_stack_base *sb, LPVOID arg) { - DWORD ret = 0; + void * ret; thread_args *args = (thread_args *)arg; - struct GC_stack_base *sb; - GC_get_stack_base(&sb); - GC_register_my_thread(&sb); /* This waits for an in-progress GC. */ + GC_register_my_thread(sb); /* This waits for an in-progress GC. */ /* Clear the thread entry even if we exit with an exception. */ /* This is probably pointless, since an uncaught exception is */ @@ -559,10 +919,15 @@ static DWORD WINAPI thread_start(LPVOID arg) #ifndef __GNUC__ __try { #endif /* __GNUC__ */ - ret = args->start (args->param); + ret = (void *)args->start (args->param); #ifndef __GNUC__ } __finally { #endif /* __GNUC__ */ +# if defined(THREAD_LOCAL_ALLOC) + LOCK(); + GC_destroy_thread_local(&(me->tlfs)); + UNLOCK(); +# endif GC_free(args); GC_delete_thread(GetCurrentThreadId()); #ifndef __GNUC__ @@ -571,8 +936,11 @@ static DWORD WINAPI thread_start(LPVOID arg) return ret; } -#endif /* !defined(MSWINCE) && !(defined(__MINGW32__) && !defined(_DLL)) */ +DWORD WINAPI GC_win32_start(struct GC_stack_base *sb, LPVOID arg) +{ + return (DWORD)GC_call_with_stack_base(GC_win32_start_inner, arg); +} #endif /* !CYGWIN32 */ #ifdef MSWINCE @@ -630,13 +998,16 @@ DWORD WINAPI main_thread_start(LPVOID arg) /* Called by GC_init() - we hold the allocation lock. */ void GC_thr_init(void) { struct GC_stack_base sb; + int sb_result; + GC_ASSERT(I_HOLD_LOCK()); if (GC_thr_initialized) return; GC_main_thread = GetCurrentThreadId(); GC_thr_initialized = TRUE; /* Add the initial thread, so we can stop it. */ - GC_get_stack_base(&sb); + sb_result = GC_get_stack_base(&sb); + GC_ASSERT(sb_result == GC_SUCCESS); GC_register_my_thread(&sb); } @@ -658,6 +1029,7 @@ int GC_pthread_join(pthread_t pthread_id, void **retval) { (int)pthread_self(), GetCurrentThreadId(), (int)pthread_id); # endif + client_has_run = TRUE; /* Thread being joined might not have registered itself yet. */ /* After the join,thread id may have been recycled. */ /* FIXME: It would be better if this worked more like */ @@ -667,9 +1039,11 @@ int GC_pthread_join(pthread_t pthread_id, void **retval) { result = pthread_join(pthread_id, retval); - /* FIXME: This is an asynchronous deletion, which we said can't */ - /* happen? */ - GC_delete_gc_thread(joinee); + if (!GC_win32_dll_threads) { + LOCK(); + GC_delete_gc_thread(joinee); + UNLOCK(); + } /* otherwise dllmain handles it. */ # if DEBUG_CYGWIN_THREADS GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n", @@ -690,8 +1064,12 @@ GC_pthread_create(pthread_t *new_thread, int result; struct start_info * si; - if (!GC_is_initialized) GC_init(); + if (!parallel_initialized) GC_init_parallel(); /* make sure GC is initialized (i.e. main thread is attached) */ + client_has_run = TRUE; + if (GC_win32_dll_threads) { + return pthread_create(new_thread, attr, start_routine, arg); + } /* This is otherwise saved only in an area mmapped by the thread */ /* library, which isn't visible to the collector. */ @@ -711,7 +1089,7 @@ GC_pthread_create(pthread_t *new_thread, (int)pthread_self(), GetCurrentThreadId); # endif GC_need_to_lock = TRUE; - result = pthread_create(new_thread, attr, GC_start_routine, si); + result = pthread_create(new_thread, attr, GC_pthread_start, si); if (result) { /* failure */ GC_free(si); @@ -720,24 +1098,24 @@ GC_pthread_create(pthread_t *new_thread, return(result); } -void * GC_start_routine(void * arg) +void * GC_pthread_start_inner(struct GC_stack_base *sb, void * arg) { struct start_info * si = arg; void * result; void *(*start)(void *); void *start_arg; - pthread_t pthread_id; DWORD thread_id = GetCurrentThreadId(); + pthread_t pthread_id = pthread_self(); GC_thread me; GC_bool detached; int i; - struct GC_stack_base sb; # if DEBUG_CYGWIN_THREADS - GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_self(), + GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_id, thread_id); # endif + GC_ASSERT(!GC_win32_dll_threads); /* If a GC occurs before the thread is registered, that GC will */ /* ignore this thread. That's fine, since it will block trying to */ /* acquire the allocation lock, and won't yet hold interesting */ @@ -745,14 +1123,14 @@ void * GC_start_routine(void * arg) LOCK(); /* We register the thread here instead of in the parent, so that */ /* we don't need to hold the allocation lock during pthread_create. */ - GC_get_stack_base(&sb); - me = GC_register_my_thread_inner(&sb, thread_id); + me = GC_register_my_thread_inner(sb, thread_id); + SET_PTHREAD_MAP_CACHE(pthread_id, thread_id); UNLOCK(); start = si -> start_routine; start_arg = si -> arg; if (si-> detached) me -> flags |= DETACHED; - me -> pthread_id = pthread_id = pthread_self(); + me -> pthread_id = pthread_id; GC_free(si); /* was allocated uncollectable */ @@ -769,17 +1147,26 @@ void * GC_start_routine(void * arg) return(result); } +void * GC_pthread_start(void * arg) +{ + return GC_call_with_stack_base(GC_pthread_start_inner, arg); +} + void GC_thread_exit_proc(void *arg) { GC_thread me = (GC_thread)arg; int i; + GC_ASSERT(!GC_win32_dll_threads); # if DEBUG_CYGWIN_THREADS GC_printf("thread 0x%x(0x%x) called pthread_exit().\n", (int)pthread_self(),GetCurrentThreadId()); # endif LOCK(); +# if defined(THREAD_LOCAL_ALLOC) + GC_destroy_thread_local(&(me->tlfs)); +# endif if (me -> flags & DETACHED) { GC_delete_thread(GetCurrentThreadId()); } else { @@ -791,6 +1178,7 @@ void GC_thread_exit_proc(void *arg) /* nothing required here... */ int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset) { + client_has_run = TRUE; return pthread_sigmask(how, set, oset); } @@ -799,6 +1187,7 @@ int GC_pthread_detach(pthread_t thread) int result; GC_thread thread_gc_id; + client_has_run = TRUE; LOCK(); thread_gc_id = GC_lookup_pthread(thread); UNLOCK(); @@ -827,6 +1216,9 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved) { struct GC_stack_base sb; DWORD thread_id; + int sb_result; + + if (client_has_run && !GC_win32_dll_threads) return TRUE; switch (reason) { case DLL_PROCESS_ATTACH: @@ -837,28 +1229,32 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved) thread_id = GetCurrentThreadId(); if (GC_main_thread != thread_id) { /* Don't lock here. */ - GC_get_stack_base(&sb); + sb_result = GC_get_stack_base(&sb); + GC_ASSERT(sb_result == GC_SUCCESS); +# ifdef THREAD_LOCAL_ALLOC + ABORT("Cannot initialize thread local cache from DllMain"); +# endif GC_register_my_thread_inner(&sb, thread_id); } /* o.w. we already did it during GC_thr_init(), called by GC_init() */ break; case DLL_THREAD_DETACH: - LOCK(); /* Safe? DllMain description is ambiguous. */ + /* We are hopefully running in the context of the exiting thread. */ + client_has_run = TRUE; + if (!GC_win32_dll_threads) return TRUE; GC_delete_thread(GetCurrentThreadId()); - UNLOCK(); break; case DLL_PROCESS_DETACH: { int i; - LOCK(); + if (!GC_win32_dll_threads) return TRUE; for (i = 0; i <= GC_get_max_thread_index(); ++i) { - if (AO_load(&(thread_table[i].in_use))) - GC_delete_gc_thread(thread_table + i); + if (AO_load(&(dll_thread_table[i].in_use))) + GC_delete_gc_thread(dll_thread_table + i); } - UNLOCK(); GC_deinit(); DeleteCriticalSection(&GC_allocate_ml); @@ -873,12 +1269,80 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved) # endif /* !MSWINCE */ -# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL) +/* Perform all initializations, including those that */ +/* may require allocation. */ +/* Called without allocation lock. */ +/* Must be called before a second thread is created. */ +void GC_init_parallel(void) +{ + if (parallel_initialized) return; + parallel_initialized = TRUE; + + /* GC_init() calls us back, so set flag first. */ + if (!GC_is_initialized) GC_init(); + /* Initialize thread local free lists if used. */ +# if defined(THREAD_LOCAL_ALLOC) + LOCK(); + GC_init_thread_local(&(GC_lookup_thread(GetCurrentThreadId())->tlfs)); + UNLOCK(); +# endif +} + +#if defined(USE_PTHREAD_LOCKS) + /* Support for pthread locking code. */ + /* Pthread_mutex_try_lock may not win here, */ + /* due to builtinsupport for spinning first? */ + +volatile GC_bool GC_collecting = 0; + /* A hint that we're in the collector and */ + /* holding the allocation lock for an */ + /* extended period. */ + +void GC_lock(void) +{ + pthread_mutex_lock(&GC_allocate_ml); +} +#endif /* USE_PTHREAD ... */ -/* We don't really support thread-local allocation with DBG_HDRS_ALL */ +# if defined(THREAD_LOCAL_ALLOC) /* Add thread-local allocation support. Microsoft uses __declspec(thread) */ +/* We must explicitly mark ptrfree and gcj free lists, since the free */ +/* list links wouldn't otherwise be found. We also set them in the */ +/* normal free lists, since that involves touching less memory than if */ +/* we scanned them normally. */ +void GC_mark_thread_local_free_lists(void) +{ + int i; + GC_thread p; + + for (i = 0; i < THREAD_TABLE_SZ; ++i) { + for (p = GC_threads[i]; 0 != p; p = p -> next) { + GC_mark_thread_local_fls_for(&(p->tlfs)); + } + } +} + +#if defined(GC_ASSERTIONS) + /* Check that all thread-local free-lists are completely marked. */ + /* also check that thread-specific-data structures are marked. */ + void GC_check_tls(void) { + int i; + GC_thread p; + + for (i = 0; i < THREAD_TABLE_SZ; ++i) { + for (p = GC_threads[i]; 0 != p; p = p -> next) { + GC_check_tls_for(&(p->tlfs)); + } + } +# if defined(USE_CUSTOM_SPECIFIC) + if (GC_thread_key != 0) + GC_check_tsd_marks(GC_thread_key); +# endif + } +#endif /* GC_ASSERTIONS */ + #endif /* THREAD_LOCAL_ALLOC ... */ #endif /* GC_WIN32_THREADS */ -- 2.7.4