From: Ulrich Drepper Date: Fri, 13 Mar 2009 23:53:18 +0000 (+0000) Subject: * config.h.in (USE_MULTIARCH): Define. X-Git-Tag: upstream/2.30~13763 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=425ce2edb9d11cc1ff650fac16dfbc450241896a;p=external%2Fglibc.git * config.h.in (USE_MULTIARCH): Define. * configure.in: Handle --enable-multi-arch. * elf/dl-runtime.c (_dl_fixup): Handle STT_GNU_IFUNC. (_dl_fixup_profile): Likewise. * elf/do-lookup.c (dl_lookup_x): Likewise. * sysdeps/x86_64/dl-machine.h: Handle STT_GNU_IFUNC. * elf/elf.h (STT_GNU_IFUNC): Define. * include/libc-symbols.h (libc_ifunc): Define. * sysdeps/x86_64/cacheinfo.c: If USE_MULTIARCH is defined, use the framework in init-arch.h to get CPUID values. * sysdeps/x86_64/multiarch/Makefile: New file. * sysdeps/x86_64/multiarch/init-arch.c: New file. * sysdeps/x86_64/multiarch/init-arch.h: New file. * sysdeps/x86_64/multiarch/sched_cpucount.c: New file. * config.make.in (experimental-malloc): Define. * configure.in: Handle --enable-experimental-malloc. * malloc/Makefile: Handle experimental-malloc flag. * malloc/malloc.c: Implement PER_THREAD and ATOMIC_FASTBINS features. * malloc/arena.c: Likewise. * malloc/hooks.c: Likewise. * malloc/malloc.h: Define M_ARENA_TEST and M_ARENA_MAX. --- diff --git a/ChangeLog b/ChangeLog index 933e81c..954285f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +2009-03-13 Ulrich Drepper + + * config.h.in (USE_MULTIARCH): Define. + * configure.in: Handle --enable-multi-arch. + * elf/dl-runtime.c (_dl_fixup): Handle STT_GNU_IFUNC. + (_dl_fixup_profile): Likewise. + * elf/do-lookup.c (dl_lookup_x): Likewise. + * sysdeps/x86_64/dl-machine.h: Handle STT_GNU_IFUNC. + * elf/elf.h (STT_GNU_IFUNC): Define. + * include/libc-symbols.h (libc_ifunc): Define. + * sysdeps/x86_64/cacheinfo.c: If USE_MULTIARCH is defined, use the + framework in init-arch.h to get CPUID values. + * sysdeps/x86_64/multiarch/Makefile: New file. + * sysdeps/x86_64/multiarch/init-arch.c: New file. + * sysdeps/x86_64/multiarch/init-arch.h: New file. + * sysdeps/x86_64/multiarch/sched_cpucount.c: New file. + + * config.make.in (experimental-malloc): Define. + * configure.in: Handle --enable-experimental-malloc. + * malloc/Makefile: Handle experimental-malloc flag. + * malloc/malloc.c: Implement PER_THREAD and ATOMIC_FASTBINS features. + * malloc/arena.c: Likewise. + * malloc/hooks.c: Likewise. + * malloc/malloc.h: Define M_ARENA_TEST and M_ARENA_MAX. + 2009-03-11 Ulrich Drepper * sysdeps/x86_64/dl-machine.h (elf_machine_rela): Add branch diff --git a/NEWS b/NEWS index 2689db3..55ba544 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -GNU C Library NEWS -- history of user-visible changes. 2009-3-8 +GNU C Library NEWS -- history of user-visible changes. 2009-3-12 Copyright (C) 1992-2008, 2009 Free Software Foundation, Inc. See the end for copying conditions. @@ -7,7 +7,7 @@ using `glibc' in the "product" field. Version 2.10 -* New Linux interface: accept4 +* New Linux interfaces: accept4, fallocate, fallocate64. * Correct declarations of string function when used in C++ code. This could lead to compile error for invalid C++ code. diff --git a/config.h.in b/config.h.in index b5abb10..8dbc224 100644 --- a/config.h.in +++ b/config.h.in @@ -189,6 +189,9 @@ /* Define if __stack_chk_guard canary should be randomized at program startup. */ #undef ENABLE_STACKGUARD_RANDOMIZE +/* Define if multi-arch DSOs should be generated. */ +#undef USE_MULTIARCH + /* */ diff --git a/config.make.in b/config.make.in index aa73466..6da6362 100644 --- a/config.make.in +++ b/config.make.in @@ -70,6 +70,7 @@ versioning = @VERSIONING@ oldest-abi = @oldest_abi@ no-whole-archive = @no_whole_archive@ exceptions = @exceptions@ +multi-arch = @multi_arch@ mach-interface-list = @mach_interface_list@ @@ -78,6 +79,8 @@ have-ksh = @libc_cv_have_ksh@ sizeof-long-double = @sizeof_long_double@ +experimental-malloc = @experimental_malloc@ + # Configuration options. build-static = @static@ build-shared = @shared@ diff --git a/configure b/configure index c5c6dc4..69af852 100755 --- a/configure +++ b/configure @@ -660,6 +660,8 @@ oldest_abi bindnow force_install all_warnings +multi_arch +experimental_malloc build build_cpu build_vendor @@ -1380,6 +1382,10 @@ Optional Features: --enable-kernel=VERSION compile for compatibility with kernel not older than VERSION --enable-all-warnings enable all useful warnings gcc can issue + --enable-multi-arch enable single DSO with optimizations for multiple + architectures + --enable-experimental-malloc + enable experimental malloc features Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -2173,6 +2179,29 @@ fi +# Check whether --enable-multi-arch was given. +if test "${enable_multi_arch+set}" = set; then + enableval=$enable_multi_arch; multi_arch=$enableval +else + multi_arch=no +fi + +if test x"$multi_arch" = xyes; then + cat >>confdefs.h <<\_ACEOF +#define USE_MULTIARCH 1 +_ACEOF + + multi_arch_d=/multiarch +fi + + +# Check whether --enable-experimental-malloc was given. +if test "${enable_experimental_malloc+set}" = set; then + enableval=$enable_experimental_malloc; experimental_malloc=$enableval +fi + + + # Make sure we can run config.sub. $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || { { echo "$as_me:$LINENO: error: cannot run $SHELL $ac_aux_dir/config.sub" >&5 @@ -2627,7 +2656,7 @@ for b in $base ''; do test "$v" = / && continue for o in /$ostry ''; do test "$o" = / && continue - for m in $mach ''; do + for m in $multi_arch_d $mach ''; do for d in $add_ons_pfx ''; do for a in $add_ons_sfx ''; do if test -n "$m0$m0sub$b$v$o$m$msub"; then @@ -5684,6 +5713,37 @@ _ACEOF fi +# For the multi-arch option we need support in the assembler. +if test "$multi_arch" = yes; then + if test "x$libc_cv_asm_type_prefix" != xno; then +{ echo "$as_me:$LINENO: checking for assembler gnu_indirect_function symbol type support" >&5 +echo $ECHO_N "checking for assembler gnu_indirect_function symbol type support... $ECHO_C" >&6; } +if test "${libc_cv_asm_gnu_indirect_function+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat > conftest.s <&5 2>&5; +then + libc_cv_asm_gnu_indirect_function=yes +else + libc_cv_asm_gnu_indirect_function=no +fi +rm -f conftest* +fi +{ echo "$as_me:$LINENO: result: $libc_cv_asm_gnu_indirect_function" >&5 +echo "${ECHO_T}$libc_cv_asm_gnu_indirect_function" >&6; } + else + libc_cv_asm_gnu_indirect_function=no + fi + if test x"$libc_cv_asm_gnu_indirect_function" != xyes; then + { { echo "$as_me:$LINENO: error: --enable-multi-arch support requires assembler and linker support" >&5 +echo "$as_me: error: --enable-multi-arch support requires assembler and linker support" >&2;} + { (exit 1); exit 1; }; } + fi +fi + { echo "$as_me:$LINENO: checking for .symver assembler directive" >&5 echo $ECHO_N "checking for .symver assembler directive... $ECHO_C" >&6; } if test "${libc_cv_asm_symver_directive+set}" = set; then @@ -9184,6 +9244,8 @@ oldest_abi!$oldest_abi$ac_delim bindnow!$bindnow$ac_delim force_install!$force_install$ac_delim all_warnings!$all_warnings$ac_delim +multi_arch!$multi_arch$ac_delim +experimental_malloc!$experimental_malloc$ac_delim build!$build$ac_delim build_cpu!$build_cpu$ac_delim build_vendor!$build_vendor$ac_delim @@ -9235,8 +9297,6 @@ libc_cv_have_bash2!$libc_cv_have_bash2$ac_delim KSH!$KSH$ac_delim libc_cv_have_ksh!$libc_cv_have_ksh$ac_delim AWK!$AWK$ac_delim -PERL!$PERL$ac_delim -INSTALL_INFO!$INSTALL_INFO$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -9278,6 +9338,8 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +PERL!$PERL$ac_delim +INSTALL_INFO!$INSTALL_INFO$ac_delim BISON!$BISON$ac_delim VERSIONING!$VERSIONING$ac_delim libc_cv_cc_with_libunwind!$libc_cv_cc_with_libunwind$ac_delim @@ -9334,7 +9396,7 @@ LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 54; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 56; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 diff --git a/configure.in b/configure.in index d7b22f3..4015722 100644 --- a/configure.in +++ b/configure.in @@ -254,6 +254,24 @@ AC_ARG_ENABLE([all-warnings], []) AC_SUBST(all_warnings) +AC_ARG_ENABLE([multi-arch], + AC_HELP_STRING([--enable-multi-arch], + [enable single DSO with optimizations for multiple architectures]), + [multi_arch=$enableval], + [multi_arch=no]) +if test x"$multi_arch" = xyes; then + AC_DEFINE(USE_MULTIARCH) + multi_arch_d=/multiarch +fi +AC_SUBST(multi_arch) + +AC_ARG_ENABLE([experimental-malloc], + AC_HELP_STRING([--enable-experimental-malloc], + [enable experimental malloc features]), + [experimental_malloc=$enableval], + []) +AC_SUBST(experimental_malloc) + AC_CANONICAL_HOST # The way shlib-versions is used to generate soversions.mk uses a @@ -608,7 +626,7 @@ for b in $base ''; do test "$v" = / && continue for o in /$ostry ''; do test "$o" = / && continue - for m in $mach ''; do + for m in $multi_arch_d $mach ''; do for d in $add_ons_pfx ''; do for a in $add_ons_sfx ''; do if test -n "$m0$m0sub$b$v$o$m$msub"; then @@ -1157,6 +1175,29 @@ if test "x$libc_cv_asm_type_prefix" != xno; then AC_DEFINE_UNQUOTED(ASM_TYPE_DIRECTIVE_PREFIX, ${libc_cv_asm_type_prefix}) fi +# For the multi-arch option we need support in the assembler. +if test "$multi_arch" = yes; then + if test "x$libc_cv_asm_type_prefix" != xno; then +AC_CACHE_CHECK([for assembler gnu_indirect_function symbol type support], + libc_cv_asm_gnu_indirect_function, [dnl +cat > conftest.s <&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD; +then + libc_cv_asm_gnu_indirect_function=yes +else + libc_cv_asm_gnu_indirect_function=no +fi +rm -f conftest*]) + else + libc_cv_asm_gnu_indirect_function=no + fi + if test x"$libc_cv_asm_gnu_indirect_function" != xyes; then + AC_MSG_ERROR([--enable-multi-arch support requires assembler and linker support]) + fi +fi + AC_CACHE_CHECK(for .symver assembler directive, libc_cv_asm_symver_directive, [cat > conftest.s <st_info) == STT_GNU_IFUNC, 0)) + value = ((DL_FIXUP_VALUE_TYPE (*) (void)) value) (); + /* Finally, fix up the plt itself. */ if (__builtin_expect (GLRO(dl_bind_not), 0)) return value; @@ -215,12 +218,21 @@ _dl_profile_fixup ( defsym != NULL ? LOOKUP_VALUE_ADDRESS (result) + defsym->st_value : 0); + + if (__builtin_expect (ELFW(ST_TYPE) (defsym->st_info) + == STT_GNU_IFUNC, 0)) + value = ((DL_FIXUP_VALUE_TYPE (*) (void)) value) (); } else { /* We already found the symbol. The module (and therefore its load address) is also known. */ value = DL_FIXUP_MAKE_VALUE (l, l->l_addr + refsym->st_value); + + if (__builtin_expect (ELFW(ST_TYPE) (refsym->st_info) + == STT_GNU_IFUNC, 0)) + value = ((DL_FIXUP_VALUE_TYPE (*) (void)) value) (); + result = l; } /* And now perhaps the relocation addend. */ diff --git a/elf/do-lookup.h b/elf/do-lookup.h index ebb9ed5..41e5fc1 100644 --- a/elf/do-lookup.h +++ b/elf/do-lookup.h @@ -1,5 +1,5 @@ /* Look up a symbol in the loaded objects. - Copyright (C) 1995-2004, 2005, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 1995-2007, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -88,10 +88,12 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash, if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) > STT_FUNC && ELFW(ST_TYPE) (sym->st_info) != STT_COMMON - && ELFW(ST_TYPE) (sym->st_info) != STT_TLS, 0)) - /* Ignore all but STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_COMMON - entries (and STT_TLS if TLS is supported) since these - are no code/data definitions. */ + && ELFW(ST_TYPE) (sym->st_info) != STT_TLS + && ELFW(ST_TYPE) (sym->st_info) != STT_GNU_IFUNC, + 0)) + /* Ignore all but STT_NOTYPE, STT_OBJECT, STT_FUNC, STT_COMMON, + STT_TLS, and STT_GNU_IFUNC since these are no code/data + definitions. */ return NULL; if (sym != ref && strcmp (strtab + sym->st_name, undef_name)) diff --git a/elf/elf.h b/elf/elf.h index 2792820..cd74d51 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -459,6 +459,7 @@ typedef struct #define STT_TLS 6 /* Symbol is thread-local data object*/ #define STT_NUM 7 /* Number of defined types. */ #define STT_LOOS 10 /* Start of OS-specific */ +#define STT_GNU_IFUNC 10 /* Symbol is indirect code object */ #define STT_HIOS 12 /* End of OS-specific */ #define STT_LOPROC 13 /* Start of processor-specific */ #define STT_HIPROC 15 /* End of processor-specific */ diff --git a/include/libc-symbols.h b/include/libc-symbols.h index a2faeaf..d53bcb9 100644 --- a/include/libc-symbols.h +++ b/include/libc-symbols.h @@ -1,7 +1,6 @@ /* Support macros for making weak and strong aliases for symbols, and for using symbol sets and linker warnings with GNU ld. - Copyright (C) 1995-1998,2000-2003,2004,2005,2006 - Free Software Foundation, Inc. + Copyright (C) 1995-1998, 2000-2006, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -835,4 +834,15 @@ for linking") # define compat_data_section .section ".data.compat", "aw"; #endif +/* Marker used for indirection function symbols. */ +#define libc_ifunc(name, expr) \ + extern void *name##_ifunc (void) __asm__ (#name); \ + void *name##_ifunc (void) \ + { \ + INIT_ARCH (); \ + __typeof (name) *res = expr; \ + return res; \ + } \ + __asm__ (".type " #name ", %gnu_indirect_function"); + #endif /* libc-symbols.h */ diff --git a/malloc/Makefile b/malloc/Makefile index 22b14ea..1099335 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -1,4 +1,4 @@ -# Copyright (C) 1991-1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007 +# Copyright (C) 1991-1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2009 # Free Software Foundation, Inc. # This file is part of the GNU C Library. @@ -124,6 +124,9 @@ endif tst-mcheck-ENV = MALLOC_CHECK_=3 +ifeq ($(experimental-malloc),yes) +CPPFLAGS-malloc.c += -DPER_THREAD -DATOMIC_FASTBINS +endif # Uncomment this for test releases. For public releases it is too expensive. #CPPFLAGS-malloc.o += -DMALLOC_DEBUG=1 diff --git a/malloc/arena.c b/malloc/arena.c index cc03dc4..f280d38 100644 --- a/malloc/arena.c +++ b/malloc/arena.c @@ -1,5 +1,5 @@ /* Malloc implementation for multiple threads without lock contention. - Copyright (C) 2001,2002,2003,2004,2005,2006,2007 + Copyright (C) 2001,2002,2003,2004,2005,2006,2007,2009 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Wolfram Gloger , 2001. @@ -78,6 +78,10 @@ extern int sanity_check_heap_info_alignment[(sizeof (heap_info) static tsd_key_t arena_key; static mutex_t list_lock; +#ifdef PER_THREAD +static size_t narenas; +static mstate free_list; +#endif #if THREAD_STATS static int stat_n_heaps; @@ -105,13 +109,30 @@ int __malloc_initialized = -1; in the new arena. */ #define arena_get(ptr, size) do { \ + arena_lookup(ptr); \ + arena_lock(ptr, size); \ +} while(0) + +#define arena_lookup(ptr) do { \ Void_t *vptr = NULL; \ ptr = (mstate)tsd_getspecific(arena_key, vptr); \ +} while(0) + +#ifdef PER_THREAD +#define arena_lock(ptr, size) do { \ + if(ptr) \ + (void)mutex_lock(&ptr->mutex); \ + else \ + ptr = arena_get2(ptr, (size)); \ +} while(0) +#else +#define arena_lock(ptr, size) do { \ if(ptr && !mutex_trylock(&ptr->mutex)) { \ THREAD_STAT(++(ptr->stat_lock_direct)); \ } else \ ptr = arena_get2(ptr, (size)); \ } while(0) +#endif /* find the heap and corresponding arena for a given ptr */ @@ -219,6 +240,11 @@ free_atfork(Void_t* mem, const Void_t *caller) } #endif +#ifdef ATOMIC_FASTBINS + ar_ptr = arena_for_chunk(p); + tsd_getspecific(arena_key, vptr); + _int_free(ar_ptr, p, vptr == ATFORK_ARENA_PTR); +#else ar_ptr = arena_for_chunk(p); tsd_getspecific(arena_key, vptr); if(vptr != ATFORK_ARENA_PTR) @@ -226,6 +252,7 @@ free_atfork(Void_t* mem, const Void_t *caller) _int_free(ar_ptr, p); if(vptr != ATFORK_ARENA_PTR) (void)mutex_unlock(&ar_ptr->mutex); +#endif } @@ -312,8 +339,17 @@ ptmalloc_unlock_all2 (void) __malloc_hook = save_malloc_hook; __free_hook = save_free_hook; #endif +#ifdef PER_THREAD + free_list = NULL; +#endif for(ar_ptr = &main_arena;;) { mutex_init(&ar_ptr->mutex); +#ifdef PER_THREAD + if (ar_ptr != save_arena) { + ar_ptr->next_free = free_list; + free_list = ar_ptr; + } +#endif ar_ptr = ar_ptr->next; if(ar_ptr == &main_arena) break; } @@ -377,6 +413,11 @@ ptmalloc_init_minimal (void) mp_.mmap_threshold = DEFAULT_MMAP_THRESHOLD; mp_.trim_threshold = DEFAULT_TRIM_THRESHOLD; mp_.pagesize = malloc_getpagesize; +#ifdef PER_THREAD +# define NARENAS_FROM_NCORES(n) ((n) * (sizeof(long) == 4 ? 2 : 8)) + mp_.arena_test = NARENAS_FROM_NCORES (1); + narenas = 1; +#endif } @@ -529,9 +570,25 @@ ptmalloc_init (void) } break; case 9: - if (! secure && memcmp (envline, "MMAP_MAX_", 9) == 0) - mALLOPt(M_MMAP_MAX, atoi(&envline[10])); + if (! secure) + { + if (memcmp (envline, "MMAP_MAX_", 9) == 0) + mALLOPt(M_MMAP_MAX, atoi(&envline[10])); +#ifdef PER_THREAD + else if (memcmp (envline, "ARENA_MAX", 9) == 0) + mALLOPt(M_ARENA_MAX, atoi(&envline[10])); +#endif + } break; +#ifdef PER_THREAD + case 10: + if (! secure) + { + if (memcmp (envline, "ARENA_TEST", 10) == 0) + mALLOPt(M_ARENA_TEST, atoi(&envline[11])); + } + break; +#endif case 15: if (! secure) { @@ -875,9 +932,110 @@ _int_new_arena(size_t size) top(a) = (mchunkptr)ptr; set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE); + tsd_setspecific(arena_key, (Void_t *)a); + mutex_init(&a->mutex); + (void)mutex_lock(&a->mutex); + +#ifdef PER_THREAD + (void)mutex_lock(&list_lock); +#endif + + /* Add the new arena to the global list. */ + a->next = main_arena.next; + atomic_write_barrier (); + main_arena.next = a; + +#ifdef PER_THREAD + ++narenas; + + (void)mutex_unlock(&list_lock); +#endif + + THREAD_STAT(++(a->stat_lock_loop)); + return a; } + +#ifdef PER_THREAD +static mstate +get_free_list (void) +{ + mstate result = free_list; + if (result != NULL) + { + (void)mutex_lock(&list_lock); + result = free_list; + if (result != NULL) + free_list = result->next_free; + (void)mutex_unlock(&list_lock); + + if (result != NULL) + { + (void)mutex_lock(&result->mutex); + tsd_setspecific(arena_key, (Void_t *)result); + THREAD_STAT(++(result->stat_lock_loop)); + } + } + + return result; +} + + +static mstate +reused_arena (void) +{ + if (narenas <= mp_.arena_test) + return NULL; + + static int narenas_limit; + if (narenas_limit == 0) + { + if (mp_.arena_max != 0) + narenas_limit = mp_.arena_max; + else + { + int n = __get_nprocs (); + + if (n >= 1) + narenas_limit = NARENAS_FROM_NCORES (n); + else + /* We have no information about the system. Assume two + cores. */ + narenas_limit = NARENAS_FROM_NCORES (2); + } + } + + if (narenas < narenas_limit) + return NULL; + + mstate result; + static mstate next_to_use; + if (next_to_use == NULL) + next_to_use = &main_arena; + + result = next_to_use; + do + { + if (!mutex_trylock(&result->mutex)) + goto out; + + result = result->next; + } + while (result != next_to_use); + + /* No arena available. Wait for the next in line. */ + (void)mutex_lock(&result->mutex); + + out: + tsd_setspecific(arena_key, (Void_t *)result); + THREAD_STAT(++(result->stat_lock_loop)); + next_to_use = result->next; + + return result; +} +#endif + static mstate internal_function #if __STD_C @@ -888,6 +1046,12 @@ arena_get2(a_tsd, size) mstate a_tsd; size_t size; { mstate a; +#ifdef PER_THREAD + if ((a = get_free_list ()) == NULL + && (a = reused_arena ()) == NULL) + /* Nothing immediately available, so generate a new arena. */ + a = _int_new_arena(size); +#else if(!a_tsd) a = a_tsd = &main_arena; else { @@ -930,24 +1094,31 @@ arena_get2(a_tsd, size) mstate a_tsd; size_t size; /* Nothing immediately available, so generate a new arena. */ a = _int_new_arena(size); - if(a) - { - tsd_setspecific(arena_key, (Void_t *)a); - mutex_init(&a->mutex); - mutex_lock(&a->mutex); /* remember result */ - - /* Add the new arena to the global list. */ - a->next = main_arena.next; - atomic_write_barrier (); - main_arena.next = a; - - THREAD_STAT(++(a->stat_lock_loop)); - } (void)mutex_unlock(&list_lock); +#endif return a; } +#ifdef PER_THREAD +static void __attribute__ ((section ("__libc_thread_freeres_fn"))) +arena_thread_freeres (void) +{ + Void_t *vptr = NULL; + mstate a = tsd_getspecific(arena_key, vptr); + tsd_setspecific(arena_key, NULL); + + if (a != NULL) + { + (void)mutex_lock(&list_lock); + a->next_free = free_list; + free_list = a; + (void)mutex_unlock(&list_lock); + } +} +text_set_element (__libc_thread_subfreeres, arena_thread_freeres); +#endif + #endif /* USE_ARENAS */ /* diff --git a/malloc/hooks.c b/malloc/hooks.c index 9659ec5..fe89db8 100644 --- a/malloc/hooks.c +++ b/malloc/hooks.c @@ -275,17 +275,13 @@ free_check(mem, caller) Void_t* mem; const Void_t *caller; mchunkptr p; if(!mem) return; - (void)mutex_lock(&main_arena.mutex); p = mem2chunk_check(mem, NULL); if(!p) { - (void)mutex_unlock(&main_arena.mutex); - malloc_printerr(check_action, "free(): invalid pointer", mem); return; } #if HAVE_MMAP if (chunk_is_mmapped(p)) { - (void)mutex_unlock(&main_arena.mutex); munmap_chunk(p); return; } @@ -293,8 +289,13 @@ free_check(mem, caller) Void_t* mem; const Void_t *caller; #if 0 /* Erase freed memory. */ memset(mem, 0, chunksize(p) - (SIZE_SZ+1)); #endif +#ifdef ATOMIC_FASTBINS + _int_free(&main_arena, p, 0); +#else + (void)mutex_lock(&main_arena.mutex); _int_free(&main_arena, p); (void)mutex_unlock(&main_arena.mutex); +#endif } static Void_t* @@ -472,7 +473,11 @@ free_starter(mem, caller) Void_t* mem; const Void_t *caller; return; } #endif +#ifdef ATOMIC_FASTBINS + _int_free(&main_arena, p, 1); +#else _int_free(&main_arena, p); +#endif } # endif /* !defiend NO_STARTER */ @@ -584,7 +589,7 @@ public_sET_STATe(Void_t* msptr) clear_fastchunks(&main_arena); set_max_fast(DEFAULT_MXFAST); for (i=0; iav[2]; diff --git a/malloc/malloc.c b/malloc/malloc.c index 12e23b0..bb7ea36 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -208,7 +208,7 @@ Tuning options that are also dynamically changeable via mallopt: - DEFAULT_MXFAST 64 + DEFAULT_MXFAST 64 (for 32bit), 128 (for 64bit) DEFAULT_TRIM_THRESHOLD 128 * 1024 DEFAULT_TOP_PAD 0 DEFAULT_MMAP_THRESHOLD 128 * 1024 @@ -254,8 +254,12 @@ #include #ifdef _LIBC +#ifdef ATOMIC_FASTBINS +#include +#endif #include #include +#include #endif #ifdef __cplusplus @@ -321,12 +325,7 @@ extern "C" { or other mallocs available that do this. */ -#if MALLOC_DEBUG #include -#else -#undef assert -#define assert(x) ((void)0) -#endif /* @@ -1308,7 +1307,7 @@ int __posix_memalign(void **, size_t, size_t); #endif #ifndef DEFAULT_MXFAST -#define DEFAULT_MXFAST 64 +#define DEFAULT_MXFAST (64 * SIZE_SZ / 4) #endif @@ -1582,7 +1581,11 @@ typedef struct malloc_chunk* mchunkptr; #if __STD_C static Void_t* _int_malloc(mstate, size_t); +#ifdef ATOMIC_FASTBINS +static void _int_free(mstate, mchunkptr, int); +#else static void _int_free(mstate, mchunkptr); +#endif static Void_t* _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T); static Void_t* _int_memalign(mstate, size_t, size_t); static Void_t* _int_valloc(mstate, size_t); @@ -2239,12 +2242,15 @@ typedef struct malloc_chunk* mbinptr; */ typedef struct malloc_chunk* mfastbinptr; +#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx]) /* offset 2 to use otherwise unindexable first 2 bins */ -#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2) +#define fastbin_index(sz) \ + ((((unsigned int)(sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2) + /* The maximum fastbin request size we support */ -#define MAX_FAST_SIZE 80 +#define MAX_FAST_SIZE (80 * SIZE_SZ / 4) #define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1) @@ -2279,8 +2285,13 @@ typedef struct malloc_chunk* mfastbinptr; #define FASTCHUNKS_BIT (1U) #define have_fastchunks(M) (((M)->flags & FASTCHUNKS_BIT) == 0) +#ifdef ATOMIC_FASTBINS +#define clear_fastchunks(M) catomic_or (&(M)->flags, FASTCHUNKS_BIT) +#define set_fastchunks(M) catomic_and (&(M)->flags, ~FASTCHUNKS_BIT) +#else #define clear_fastchunks(M) ((M)->flags |= FASTCHUNKS_BIT) #define set_fastchunks(M) ((M)->flags &= ~FASTCHUNKS_BIT) +#endif /* NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous @@ -2327,7 +2338,7 @@ struct malloc_state { #endif /* Fastbins */ - mfastbinptr fastbins[NFASTBINS]; + mfastbinptr fastbinsY[NFASTBINS]; /* Base of the topmost chunk -- not otherwise kept in a bin */ mchunkptr top; @@ -2344,6 +2355,11 @@ struct malloc_state { /* Linked list */ struct malloc_state *next; +#ifdef PER_THREAD + /* Linked list for free arenas. */ + struct malloc_state *next_free; +#endif + /* Memory allocated from the system in this arena. */ INTERNAL_SIZE_T system_mem; INTERNAL_SIZE_T max_system_mem; @@ -2354,6 +2370,10 @@ struct malloc_par { unsigned long trim_threshold; INTERNAL_SIZE_T top_pad; INTERNAL_SIZE_T mmap_threshold; +#ifdef PER_THREAD + INTERNAL_SIZE_T arena_test; + INTERNAL_SIZE_T arena_max; +#endif /* Memory map support */ int n_mmaps; @@ -2391,6 +2411,13 @@ static struct malloc_state main_arena; static struct malloc_par mp_; +#ifdef PER_THREAD +/* Non public mallopt parameters. */ +#define M_ARENA_TEST -7 +#define M_ARENA_MAX -8 +#endif + + /* Maximum size of memory handled in fastbins. */ static INTERNAL_SIZE_T global_max_fast; @@ -3037,8 +3064,10 @@ static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; /* Precondition: not enough current space to satisfy nb request */ assert((unsigned long)(old_size) < (unsigned long)(nb + MINSIZE)); +#ifndef ATOMIC_FASTBINS /* Precondition: all fastbins are consolidated */ assert(!have_fastchunks(av)); +#endif if (av != &main_arena) { @@ -3084,7 +3113,11 @@ static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; set_head(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)|PREV_INUSE); set_foot(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)); set_head(old_top, old_size|PREV_INUSE|NON_MAIN_ARENA); +#ifdef ATOMIC_FASTBINS + _int_free(av, old_top, 1); +#else _int_free(av, old_top); +#endif } else { set_head(old_top, (old_size + 2*SIZE_SZ)|PREV_INUSE); set_foot(old_top, (old_size + 2*SIZE_SZ)); @@ -3323,7 +3356,11 @@ static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; /* If possible, release the rest. */ if (old_size >= MINSIZE) { +#ifdef ATOMIC_FASTBINS + _int_free(av, old_top, 1); +#else _int_free(av, old_top); +#endif } } @@ -3545,7 +3582,40 @@ public_mALLOc(size_t bytes) if (__builtin_expect (hook != NULL, 0)) return (*hook)(bytes, RETURN_ADDRESS (0)); - arena_get(ar_ptr, bytes); + arena_lookup(ar_ptr); +#if 0 + // XXX We need double-word CAS and fastbins must be extended to also + // XXX hold a generation counter for each entry. + if (ar_ptr) { + INTERNAL_SIZE_T nb; /* normalized request size */ + checked_request2size(bytes, nb); + if (nb <= get_max_fast ()) { + long int idx = fastbin_index(nb); + mfastbinptr* fb = &fastbin (ar_ptr, idx); + mchunkptr pp = *fb; + mchunkptr v; + do + { + v = pp; + if (v == NULL) + break; + } + while ((pp = catomic_compare_and_exchange_val_acq (fb, v->fd, v)) != v); + if (v != 0) { + if (__builtin_expect (fastbin_index (chunksize (v)) != idx, 0)) + malloc_printerr (check_action, "malloc(): memory corruption (fast)", + chunk2mem (v)); + check_remalloced_chunk(ar_ptr, v, nb); + void *p = chunk2mem(v); + if (__builtin_expect (perturb_byte, 0)) + alloc_perturb (p, bytes); + return p; + } + } + } +#endif + + arena_lock(ar_ptr, bytes); if(!ar_ptr) return 0; victim = _int_malloc(ar_ptr, bytes); @@ -3612,18 +3682,22 @@ public_fREe(Void_t* mem) #endif ar_ptr = arena_for_chunk(p); -#if THREAD_STATS +#ifdef ATOMIC_FASTBINS + _int_free(ar_ptr, p, 0); +#else +# if THREAD_STATS if(!mutex_trylock(&ar_ptr->mutex)) ++(ar_ptr->stat_lock_direct); else { (void)mutex_lock(&ar_ptr->mutex); ++(ar_ptr->stat_lock_wait); } -#else +# else (void)mutex_lock(&ar_ptr->mutex); -#endif +# endif _int_free(ar_ptr, p); (void)mutex_unlock(&ar_ptr->mutex); +#endif } #ifdef libc_hidden_def libc_hidden_def (public_fREe) @@ -3699,7 +3773,7 @@ public_rEALLOc(Void_t* oldmem, size_t bytes) (void)mutex_lock(&ar_ptr->mutex); #endif -#ifndef NO_THREADS +#if !defined NO_THREADS && !defined PER_THREAD /* As in malloc(), remember this arena for the next allocation. */ tsd_setspecific(arena_key, (Void_t *)ar_ptr); #endif @@ -3717,18 +3791,22 @@ public_rEALLOc(Void_t* oldmem, size_t bytes) if (newp != NULL) { MALLOC_COPY (newp, oldmem, oldsize - SIZE_SZ); -#if THREAD_STATS +#ifdef ATOMIC_FASTBINS + _int_free(ar_ptr, oldp, 0); +#else +# if THREAD_STATS if(!mutex_trylock(&ar_ptr->mutex)) ++(ar_ptr->stat_lock_direct); else { (void)mutex_lock(&ar_ptr->mutex); ++(ar_ptr->stat_lock_wait); } -#else +# else (void)mutex_lock(&ar_ptr->mutex); -#endif +# endif _int_free(ar_ptr, oldp); (void)mutex_unlock(&ar_ptr->mutex); +#endif } } @@ -4130,7 +4208,6 @@ _int_malloc(mstate av, size_t bytes) INTERNAL_SIZE_T nb; /* normalized request size */ unsigned int idx; /* associated bin index */ mbinptr bin; /* associated bin */ - mfastbinptr* fb; /* associated fastbin */ mchunkptr victim; /* inspected/selected chunk */ INTERNAL_SIZE_T size; /* its size */ @@ -4164,13 +4241,28 @@ _int_malloc(mstate av, size_t bytes) */ if ((unsigned long)(nb) <= (unsigned long)(get_max_fast ())) { - long int idx = fastbin_index(nb); - fb = &(av->fastbins[idx]); - if ( (victim = *fb) != 0) { + idx = fastbin_index(nb); + mfastbinptr* fb = &fastbin (av, idx); +#ifdef ATOMIC_FASTBINS + mchunkptr pp = *fb; + do + { + victim = pp; + if (victim == NULL) + break; + } + while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) + != victim); +#else + victim = *fb; +#endif + if (victim != 0) { if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0)) malloc_printerr (check_action, "malloc(): memory corruption (fast)", chunk2mem (victim)); +#ifndef ATOMIC_FASTBINS *fb = victim->fd; +#endif check_remalloced_chunk(av, victim, nb); void *p = chunk2mem(victim); if (__builtin_expect (perturb_byte, 0)) @@ -4560,6 +4652,18 @@ _int_malloc(mstate av, size_t bytes) return p; } +#ifdef ATOMIC_FASTBINS + /* When we are using atomic ops to free fast chunks we can get + here for all block sizes. */ + else if (have_fastchunks(av)) { + malloc_consolidate(av); + /* restore original bin index */ + if (in_smallbin_range(nb)) + idx = smallbin_index(nb); + else + idx = largebin_index(nb); + } +#else /* If there is space available in fastbins, consolidate and retry, to possibly avoid expanding memory. This can occur only if nb is @@ -4571,6 +4675,7 @@ _int_malloc(mstate av, size_t bytes) malloc_consolidate(av); idx = smallbin_index(nb); /* restore original bin index */ } +#endif /* Otherwise, relay to handle system-dependent cases @@ -4589,7 +4694,11 @@ _int_malloc(mstate av, size_t bytes) */ static void +#ifdef ATOMIC_FASTBINS +_int_free(mstate av, mchunkptr p, int have_lock) +#else _int_free(mstate av, mchunkptr p) +#endif { INTERNAL_SIZE_T size; /* its size */ mfastbinptr* fb; /* associated fastbin */ @@ -4601,6 +4710,9 @@ _int_free(mstate av, mchunkptr p) mchunkptr fwd; /* misc temp for linking */ const char *errstr = NULL; +#ifdef ATOMIC_FASTBINS + int locked = 0; +#endif size = chunksize(p); @@ -4613,6 +4725,10 @@ _int_free(mstate av, mchunkptr p) { errstr = "free(): invalid pointer"; errout: +#ifdef ATOMIC_FASTBINS + if (! have_lock && locked) + (void)mutex_unlock(&av->mutex); +#endif malloc_printerr (check_action, errstr, chunk2mem(p)); return; } @@ -4649,8 +4765,28 @@ _int_free(mstate av, mchunkptr p) goto errout; } + if (__builtin_expect (perturb_byte, 0)) + free_perturb (chunk2mem(p), size - SIZE_SZ); + set_fastchunks(av); - fb = &(av->fastbins[fastbin_index(size)]); + fb = &fastbin (av, fastbin_index(size)); + +#ifdef ATOMIC_FASTBINS + mchunkptr fd; + mchunkptr old = *fb; + do + { + /* Another simple check: make sure the top of the bin is not the + record we are going to add (i.e., double free). */ + if (__builtin_expect (old == p, 0)) + { + errstr = "double free or corruption (fasttop)"; + goto errout; + } + p->fd = fd = old; + } + while ((old = catomic_compare_and_exchange_val_acq (fb, p, fd)) != fd); +#else /* Another simple check: make sure the top of the bin is not the record we are going to add (i.e., double free). */ if (__builtin_expect (*fb == p, 0)) @@ -4659,11 +4795,9 @@ _int_free(mstate av, mchunkptr p) goto errout; } - if (__builtin_expect (perturb_byte, 0)) - free_perturb (chunk2mem(p), size - SIZE_SZ); - p->fd = *fb; *fb = p; +#endif } /* @@ -4671,6 +4805,22 @@ _int_free(mstate av, mchunkptr p) */ else if (!chunk_is_mmapped(p)) { +#ifdef ATOMIC_FASTBINS + if (! have_lock) { +# if THREAD_STATS + if(!mutex_trylock(&av->mutex)) + ++(av->stat_lock_direct); + else { + (void)mutex_lock(&av->mutex); + ++(av->stat_lock_wait); + } +# else + (void)mutex_lock(&av->mutex); +# endif + locked = 1; + } +#endif + nextchunk = chunk_at_offset(p, size); /* Lightweight tests: check whether the block is already the @@ -4794,6 +4944,12 @@ _int_free(mstate av, mchunkptr p) } } +#ifdef ATOMIC_FASTBINS + if (! have_lock) { + assert (locked); + (void)mutex_unlock(&av->mutex); + } +#endif } /* If the chunk was allocated via mmap, release via munmap(). Note @@ -4869,15 +5025,21 @@ static void malloc_consolidate(av) mstate av; because, except for the main arena, all the others might have blocks in the high fast bins. It's not worth it anyway, just search all bins all the time. */ - maxfb = &(av->fastbins[fastbin_index(get_max_fast ())]); + maxfb = &fastbin (av, fastbin_index(get_max_fast ())); #else - maxfb = &(av->fastbins[NFASTBINS - 1]); + maxfb = &fastbin (av, NFASTBINS - 1); #endif - fb = &(av->fastbins[0]); + fb = &fastbin (av, 0); do { - if ( (p = *fb) != 0) { - *fb = 0; - +#ifdef ATOMIC_FASTBINS + p = atomic_exchange_acq (fb, 0); +#else + p = *fb; +#endif + if (p != 0) { +#ifndef ATOMIC_FASTBINS + *fb = 0; +#endif do { check_inuse_chunk(av, p); nextp = p->fd; @@ -5070,7 +5232,11 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T nb) } } +#ifdef ATOMIC_FASTBINS + _int_free(av, oldp, 1); +#else _int_free(av, oldp); +#endif check_inuse_chunk(av, newp); return chunk2mem(newp); } @@ -5094,7 +5260,11 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T nb) (av != &main_arena ? NON_MAIN_ARENA : 0)); /* Mark remainder as inuse so free() won't complain */ set_inuse_bit_at_offset(remainder, remainder_size); +#ifdef ATOMIC_FASTBINS + _int_free(av, remainder, 1); +#else _int_free(av, remainder); +#endif } check_inuse_chunk(av, newp); @@ -5153,7 +5323,11 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T nb) newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK); if (newmem != 0) { MALLOC_COPY(newmem, chunk2mem(oldp), oldsize - 2*SIZE_SZ); +#ifdef ATOMIC_FASTBINS + _int_free(av, oldp, 1); +#else _int_free(av, oldp); +#endif } } return newmem; @@ -5247,7 +5421,11 @@ _int_memalign(mstate av, size_t alignment, size_t bytes) (av != &main_arena ? NON_MAIN_ARENA : 0)); set_inuse_bit_at_offset(newp, newsize); set_head_size(p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0)); +#ifdef ATOMIC_FASTBINS + _int_free(av, p, 1); +#else _int_free(av, p); +#endif p = newp; assert (newsize >= nb && @@ -5263,7 +5441,11 @@ _int_memalign(mstate av, size_t alignment, size_t bytes) set_head(remainder, remainder_size | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head_size(p, nb); +#ifdef ATOMIC_FASTBINS + _int_free(av, remainder, 1); +#else _int_free(av, remainder); +#endif } } @@ -5650,7 +5832,7 @@ struct mallinfo mALLINFo(mstate av) fastavail = 0; for (i = 0; i < NFASTBINS; ++i) { - for (p = av->fastbins[i]; p != 0; p = p->fd) { + for (p = fastbin (av, i); p != 0; p = p->fd) { ++nfastblocks; fastavail += chunksize(p); } @@ -5818,6 +6000,18 @@ int mALLOPt(param_number, value) int param_number; int value; case M_PERTURB: perturb_byte = value; break; + +#ifdef PER_THREAD + case M_ARENA_TEST: + if (value > 0) + mp_.arena_test = value; + break; + + case M_ARENA_MAX: + if (value > 0) + mp_.arena_max = value; + break; +#endif } (void)mutex_unlock(&av->mutex); return res; diff --git a/malloc/malloc.h b/malloc/malloc.h index b6d7a8a..2c0ee35 100644 --- a/malloc/malloc.h +++ b/malloc/malloc.h @@ -1,5 +1,5 @@ /* Prototypes and definition for malloc implementation. - Copyright (C) 1996, 1997, 1999, 2000, 2002-2004, 2005, 2007 + Copyright (C) 1996, 1997, 1999, 2000, 2002-2004, 2005, 2007, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -127,6 +127,8 @@ extern struct mallinfo mallinfo __MALLOC_P ((void)); #define M_MMAP_MAX -4 #define M_CHECK_ACTION -5 #define M_PERTURB -6 +#define M_ARENA_TEST -7 +#define M_ARENA_MAX -8 /* General SVID/XPG interface to tunable parameters. */ extern int mallopt __MALLOC_P ((int __param, int __val)); diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c index a7e3fc7..8769e9c 100644 --- a/sysdeps/x86_64/cacheinfo.c +++ b/sysdeps/x86_64/cacheinfo.c @@ -23,6 +23,10 @@ #include #include +#ifdef USE_MULTIARCH +# include "multiarch/init-arch.h" +#endif + static const struct intel_02_cache_info { unsigned int idx; @@ -443,19 +447,32 @@ init_cacheinfo (void) unsigned int ebx; unsigned int ecx; unsigned int edx; - int max_cpuid; int max_cpuid_ex; long int data = -1; long int shared = -1; unsigned int level; unsigned int threads = 0; +#ifdef USE_MULTIARCH + if (__cpu_features.kind == arch_kind_unknown) + __init_cpu_features (); +# define is_intel __cpu_features.kind == arch_kind_intel +# define is_amd __cpu_features.kind == arch_kind_amd +# define max_cpuid __cpu_features.max_cpuid +#else + int max_cpuid; asm volatile ("cpuid" : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (0)); - /* This spells out "GenuineIntel". */ - if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) +# define is_intel \ + ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69 + /* This spells out "AuthenticAMD". */ +# define is_amd \ + ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65 +#endif + + if (is_intel) { data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid); @@ -470,9 +487,16 @@ init_cacheinfo (void) shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); } +#ifdef USE_MULTIARCH + eax = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].eax; + ebx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ebx; + ecx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx; + edx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].edx; +#else asm volatile ("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (1)); +#endif /* Intel prefers SSSE3 instructions for memory/string routines if they are avaiable. */ @@ -519,7 +543,7 @@ init_cacheinfo (void) shared /= threads; } /* This spells out "AuthenticAMD". */ - else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + else if (is_amd) { data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile new file mode 100644 index 0000000..2a1e910 --- /dev/null +++ b/sysdeps/x86_64/multiarch/Makefile @@ -0,0 +1,3 @@ +ifeq ($(subdir),csu) +aux += init-arch +endif diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c new file mode 100644 index 0000000..eb4365f --- /dev/null +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -0,0 +1,65 @@ +/* Initialize CPU feature data. + This file is part of the GNU C Library. + Copyright (C) 2008 Free Software Foundation, Inc. + Contributed by Ulrich Drepper . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "init-arch.h" + + +struct cpu_features __cpu_features attribute_hidden; + + +void +__init_cpu_features (void) +{ + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + asm volatile ("cpuid" + : "=a" (__cpu_features.max_cpuid), "=b" (ebx), "=c" (ecx), + "=d" (edx) + : "0" (0)); + + /* This spells out "GenuineIntel". */ + if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) + { + __cpu_features.kind = arch_kind_intel; + + asm volatile ("cpuid" + : "=a" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].eax), + "=b" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].ebx), + "=c" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx), + "=d" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].edx) + : "0" (1)); + } + /* This spells out "AuthenticAMD". */ + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + { + __cpu_features.kind = arch_kind_amd; + + asm volatile ("cpuid" + : "=a" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].eax), + "=b" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].ebx), + "=c" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].ecx), + "=d" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].edx) + : "0" (1)); + } + else + __cpu_features.kind = arch_kind_other; +} diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h new file mode 100644 index 0000000..86cd83d --- /dev/null +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -0,0 +1,70 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2008 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +enum + { + INTEL_CPUID_INDEX_1 = 0, + /* Keep the following line at the end. */ + INTEL_CPUID_INDEX_MAX + }; + +enum + { + AMD_CPUID_INDEX_1 = 0, + /* Keep the following line at the end. */ + AMD_CPUID_INDEX_MAX + }; + +extern struct cpu_features +{ + enum + { + arch_kind_unknown = 0, + arch_kind_intel, + arch_kind_amd, + arch_kind_other + } kind; + int max_cpuid; + struct + { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + } cpuid[MAX (INTEL_CPUID_INDEX_MAX, AMD_CPUID_INDEX_MAX)]; +} __cpu_features attribute_hidden; + + +extern void __init_cpu_features (void) attribute_hidden; +#define INIT_ARCH()\ + do \ + if (__cpu_features.kind == arch_kind_unknown) \ + __init_cpu_features (); \ + while (0) + +/* Following are the feature tests used throughout libc. */ + +#define INTEL_HAS_POPCOUNT \ + (__cpu_features.kind == arch_kind_intel \ + && (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx & (1 << 23)) != 0) + +#define AMD_HAS_POPCOUNT \ + (__cpu_features.kind == arch_kind_amd \ + && (__cpu_features.cpuid[AMD_CPUID_INDEX_1].ecx & (1 << 23)) != 0) diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c new file mode 100644 index 0000000..dc20182 --- /dev/null +++ b/sysdeps/x86_64/multiarch/sched_cpucount.c @@ -0,0 +1,42 @@ +/* Count bits in CPU set. x86-64 multi-arch version. + This file is part of the GNU C Library. + Copyright (C) 2008 Free Software Foundation, Inc. + Contributed by Ulrich Drepper . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifdef SHARED +# include +# include "init-arch.h" + +# define __sched_cpucount static generic_cpucount +# include +# undef __sched_cpucount + +# define POPCNT(l) \ + ({ __cpu_mask r; \ + asm ("popcntq %1, %0" : "=r" (r) : "0" (l));\ + r; }) +# define __sched_cpucount static popcount_cpucount +# include +# undef __sched_cpucount + +libc_ifunc (__sched_cpucount, + INTEL_HAS_POPCOUNT || AMD_HAS_POPCOUNT + ? popcount_cpucount : generic_cpucount); +#else +# include_next +#endif