* configure.in: Handle --enable-multi-arch.
* elf/dl-runtime.c (_dl_fixup): Handle STT_GNU_IFUNC.
(_dl_fixup_profile): Likewise.
* elf/do-lookup.c (dl_lookup_x): Likewise.
* sysdeps/x86_64/dl-machine.h: Handle STT_GNU_IFUNC.
* elf/elf.h (STT_GNU_IFUNC): Define.
* include/libc-symbols.h (libc_ifunc): Define.
* sysdeps/x86_64/cacheinfo.c: If USE_MULTIARCH is defined, use the
framework in init-arch.h to get CPUID values.
* sysdeps/x86_64/multiarch/Makefile: New file.
* sysdeps/x86_64/multiarch/init-arch.c: New file.
* sysdeps/x86_64/multiarch/init-arch.h: New file.
* sysdeps/x86_64/multiarch/sched_cpucount.c: New file.
* config.make.in (experimental-malloc): Define.
* configure.in: Handle --enable-experimental-malloc.
* malloc/Makefile: Handle experimental-malloc flag.
* malloc/malloc.c: Implement PER_THREAD and ATOMIC_FASTBINS features.
* malloc/arena.c: Likewise.
* malloc/hooks.c: Likewise.
* malloc/malloc.h: Define M_ARENA_TEST and M_ARENA_MAX.
+2009-03-13 Ulrich Drepper <drepper@redhat.com>
+
+ * config.h.in (USE_MULTIARCH): Define.
+ * configure.in: Handle --enable-multi-arch.
+ * elf/dl-runtime.c (_dl_fixup): Handle STT_GNU_IFUNC.
+ (_dl_fixup_profile): Likewise.
+ * elf/do-lookup.c (dl_lookup_x): Likewise.
+ * sysdeps/x86_64/dl-machine.h: Handle STT_GNU_IFUNC.
+ * elf/elf.h (STT_GNU_IFUNC): Define.
+ * include/libc-symbols.h (libc_ifunc): Define.
+ * sysdeps/x86_64/cacheinfo.c: If USE_MULTIARCH is defined, use the
+ framework in init-arch.h to get CPUID values.
+ * sysdeps/x86_64/multiarch/Makefile: New file.
+ * sysdeps/x86_64/multiarch/init-arch.c: New file.
+ * sysdeps/x86_64/multiarch/init-arch.h: New file.
+ * sysdeps/x86_64/multiarch/sched_cpucount.c: New file.
+
+ * config.make.in (experimental-malloc): Define.
+ * configure.in: Handle --enable-experimental-malloc.
+ * malloc/Makefile: Handle experimental-malloc flag.
+ * malloc/malloc.c: Implement PER_THREAD and ATOMIC_FASTBINS features.
+ * malloc/arena.c: Likewise.
+ * malloc/hooks.c: Likewise.
+ * malloc/malloc.h: Define M_ARENA_TEST and M_ARENA_MAX.
+
2009-03-11 Ulrich Drepper <drepper@redhat.com>
* sysdeps/x86_64/dl-machine.h (elf_machine_rela): Add branch
-GNU C Library NEWS -- history of user-visible changes. 2009-3-8
+GNU C Library NEWS -- history of user-visible changes. 2009-3-12
Copyright (C) 1992-2008, 2009 Free Software Foundation, Inc.
See the end for copying conditions.
\f
Version 2.10
-* New Linux interface: accept4
+* New Linux interfaces: accept4, fallocate, fallocate64.
* Correct declarations of string function when used in C++ code. This
could lead to compile error for invalid C++ code.
/* Define if __stack_chk_guard canary should be randomized at program startup. */
#undef ENABLE_STACKGUARD_RANDOMIZE
+/* Define if multi-arch DSOs should be generated. */
+#undef USE_MULTIARCH
+
/*
\f */
oldest-abi = @oldest_abi@
no-whole-archive = @no_whole_archive@
exceptions = @exceptions@
+multi-arch = @multi_arch@
mach-interface-list = @mach_interface_list@
sizeof-long-double = @sizeof_long_double@
+experimental-malloc = @experimental_malloc@
+
# Configuration options.
build-static = @static@
build-shared = @shared@
bindnow
force_install
all_warnings
+multi_arch
+experimental_malloc
build
build_cpu
build_vendor
--enable-kernel=VERSION compile for compatibility with kernel not older than
VERSION
--enable-all-warnings enable all useful warnings gcc can issue
+ --enable-multi-arch enable single DSO with optimizations for multiple
+ architectures
+ --enable-experimental-malloc
+ enable experimental malloc features
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
+# Check whether --enable-multi-arch was given.
+if test "${enable_multi_arch+set}" = set; then
+ enableval=$enable_multi_arch; multi_arch=$enableval
+else
+ multi_arch=no
+fi
+
+if test x"$multi_arch" = xyes; then
+ cat >>confdefs.h <<\_ACEOF
+#define USE_MULTIARCH 1
+_ACEOF
+
+ multi_arch_d=/multiarch
+fi
+
+
+# Check whether --enable-experimental-malloc was given.
+if test "${enable_experimental_malloc+set}" = set; then
+ enableval=$enable_experimental_malloc; experimental_malloc=$enableval
+fi
+
+
+
# Make sure we can run config.sub.
$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
{ { echo "$as_me:$LINENO: error: cannot run $SHELL $ac_aux_dir/config.sub" >&5
test "$v" = / && continue
for o in /$ostry ''; do
test "$o" = / && continue
- for m in $mach ''; do
+ for m in $multi_arch_d $mach ''; do
for d in $add_ons_pfx ''; do
for a in $add_ons_sfx ''; do
if test -n "$m0$m0sub$b$v$o$m$msub"; then
fi
+# For the multi-arch option we need support in the assembler.
+if test "$multi_arch" = yes; then
+ if test "x$libc_cv_asm_type_prefix" != xno; then
+{ echo "$as_me:$LINENO: checking for assembler gnu_indirect_function symbol type support" >&5
+echo $ECHO_N "checking for assembler gnu_indirect_function symbol type support... $ECHO_C" >&6; }
+if test "${libc_cv_asm_gnu_indirect_function+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat > conftest.s <<EOF
+.type foo,%gnu_indirect_function
+EOF
+if ${CC-cc} -c $ASFLAGS conftest.s 1>&5 2>&5;
+then
+ libc_cv_asm_gnu_indirect_function=yes
+else
+ libc_cv_asm_gnu_indirect_function=no
+fi
+rm -f conftest*
+fi
+{ echo "$as_me:$LINENO: result: $libc_cv_asm_gnu_indirect_function" >&5
+echo "${ECHO_T}$libc_cv_asm_gnu_indirect_function" >&6; }
+ else
+ libc_cv_asm_gnu_indirect_function=no
+ fi
+ if test x"$libc_cv_asm_gnu_indirect_function" != xyes; then
+ { { echo "$as_me:$LINENO: error: --enable-multi-arch support requires assembler and linker support" >&5
+echo "$as_me: error: --enable-multi-arch support requires assembler and linker support" >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+fi
+
{ echo "$as_me:$LINENO: checking for .symver assembler directive" >&5
echo $ECHO_N "checking for .symver assembler directive... $ECHO_C" >&6; }
if test "${libc_cv_asm_symver_directive+set}" = set; then
bindnow!$bindnow$ac_delim
force_install!$force_install$ac_delim
all_warnings!$all_warnings$ac_delim
+multi_arch!$multi_arch$ac_delim
+experimental_malloc!$experimental_malloc$ac_delim
build!$build$ac_delim
build_cpu!$build_cpu$ac_delim
build_vendor!$build_vendor$ac_delim
KSH!$KSH$ac_delim
libc_cv_have_ksh!$libc_cv_have_ksh$ac_delim
AWK!$AWK$ac_delim
-PERL!$PERL$ac_delim
-INSTALL_INFO!$INSTALL_INFO$ac_delim
_ACEOF
if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
ac_delim='%!_!# '
for ac_last_try in false false false false false :; do
cat >conf$$subs.sed <<_ACEOF
+PERL!$PERL$ac_delim
+INSTALL_INFO!$INSTALL_INFO$ac_delim
BISON!$BISON$ac_delim
VERSIONING!$VERSIONING$ac_delim
libc_cv_cc_with_libunwind!$libc_cv_cc_with_libunwind$ac_delim
LTLIBOBJS!$LTLIBOBJS$ac_delim
_ACEOF
- if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 54; then
+ if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 56; then
break
elif $ac_last_try; then
{ { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
[])
AC_SUBST(all_warnings)
+AC_ARG_ENABLE([multi-arch],
+ AC_HELP_STRING([--enable-multi-arch],
+ [enable single DSO with optimizations for multiple architectures]),
+ [multi_arch=$enableval],
+ [multi_arch=no])
+if test x"$multi_arch" = xyes; then
+ AC_DEFINE(USE_MULTIARCH)
+ multi_arch_d=/multiarch
+fi
+AC_SUBST(multi_arch)
+
+AC_ARG_ENABLE([experimental-malloc],
+ AC_HELP_STRING([--enable-experimental-malloc],
+ [enable experimental malloc features]),
+ [experimental_malloc=$enableval],
+ [])
+AC_SUBST(experimental_malloc)
+
AC_CANONICAL_HOST
# The way shlib-versions is used to generate soversions.mk uses a
test "$v" = / && continue
for o in /$ostry ''; do
test "$o" = / && continue
- for m in $mach ''; do
+ for m in $multi_arch_d $mach ''; do
for d in $add_ons_pfx ''; do
for a in $add_ons_sfx ''; do
if test -n "$m0$m0sub$b$v$o$m$msub"; then
AC_DEFINE_UNQUOTED(ASM_TYPE_DIRECTIVE_PREFIX, ${libc_cv_asm_type_prefix})
fi
+# For the multi-arch option we need support in the assembler.
+if test "$multi_arch" = yes; then
+ if test "x$libc_cv_asm_type_prefix" != xno; then
+AC_CACHE_CHECK([for assembler gnu_indirect_function symbol type support],
+ libc_cv_asm_gnu_indirect_function, [dnl
+cat > conftest.s <<EOF
+.type foo,%gnu_indirect_function
+EOF
+if ${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD;
+then
+ libc_cv_asm_gnu_indirect_function=yes
+else
+ libc_cv_asm_gnu_indirect_function=no
+fi
+rm -f conftest*])
+ else
+ libc_cv_asm_gnu_indirect_function=no
+ fi
+ if test x"$libc_cv_asm_gnu_indirect_function" != xyes; then
+ AC_MSG_ERROR([--enable-multi-arch support requires assembler and linker support])
+ fi
+fi
+
AC_CACHE_CHECK(for .symver assembler directive, libc_cv_asm_symver_directive,
[cat > conftest.s <<EOF
${libc_cv_dot_text}
/* On-demand PLT fixup for shared objects.
- Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 1995-2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
/* And now perhaps the relocation addend. */
value = elf_machine_plt_value (l, reloc, value);
+ if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0))
+ value = ((DL_FIXUP_VALUE_TYPE (*) (void)) value) ();
+
/* Finally, fix up the plt itself. */
if (__builtin_expect (GLRO(dl_bind_not), 0))
return value;
defsym != NULL
? LOOKUP_VALUE_ADDRESS (result)
+ defsym->st_value : 0);
+
+ if (__builtin_expect (ELFW(ST_TYPE) (defsym->st_info)
+ == STT_GNU_IFUNC, 0))
+ value = ((DL_FIXUP_VALUE_TYPE (*) (void)) value) ();
}
else
{
/* We already found the symbol. The module (and therefore its load
address) is also known. */
value = DL_FIXUP_MAKE_VALUE (l, l->l_addr + refsym->st_value);
+
+ if (__builtin_expect (ELFW(ST_TYPE) (refsym->st_info)
+ == STT_GNU_IFUNC, 0))
+ value = ((DL_FIXUP_VALUE_TYPE (*) (void)) value) ();
+
result = l;
}
/* And now perhaps the relocation addend. */
/* Look up a symbol in the loaded objects.
- Copyright (C) 1995-2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 1995-2007, 2008 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) > STT_FUNC
&& ELFW(ST_TYPE) (sym->st_info) != STT_COMMON
- && ELFW(ST_TYPE) (sym->st_info) != STT_TLS, 0))
- /* Ignore all but STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_COMMON
- entries (and STT_TLS if TLS is supported) since these
- are no code/data definitions. */
+ && ELFW(ST_TYPE) (sym->st_info) != STT_TLS
+ && ELFW(ST_TYPE) (sym->st_info) != STT_GNU_IFUNC,
+ 0))
+ /* Ignore all but STT_NOTYPE, STT_OBJECT, STT_FUNC, STT_COMMON,
+ STT_TLS, and STT_GNU_IFUNC since these are no code/data
+ definitions. */
return NULL;
if (sym != ref && strcmp (strtab + sym->st_name, undef_name))
#define STT_TLS 6 /* Symbol is thread-local data object*/
#define STT_NUM 7 /* Number of defined types. */
#define STT_LOOS 10 /* Start of OS-specific */
+#define STT_GNU_IFUNC 10 /* Symbol is indirect code object */
#define STT_HIOS 12 /* End of OS-specific */
#define STT_LOPROC 13 /* Start of processor-specific */
#define STT_HIPROC 15 /* End of processor-specific */
/* Support macros for making weak and strong aliases for symbols,
and for using symbol sets and linker warnings with GNU ld.
- Copyright (C) 1995-1998,2000-2003,2004,2005,2006
- Free Software Foundation, Inc.
+ Copyright (C) 1995-1998, 2000-2006, 2008 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
# define compat_data_section .section ".data.compat", "aw";
#endif
+/* Marker used for indirection function symbols. */
+#define libc_ifunc(name, expr) \
+ extern void *name##_ifunc (void) __asm__ (#name); \
+ void *name##_ifunc (void) \
+ { \
+ INIT_ARCH (); \
+ __typeof (name) *res = expr; \
+ return res; \
+ } \
+ __asm__ (".type " #name ", %gnu_indirect_function");
+
#endif /* libc-symbols.h */
-# Copyright (C) 1991-1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007
+# Copyright (C) 1991-1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2009
# Free Software Foundation, Inc.
# This file is part of the GNU C Library.
tst-mcheck-ENV = MALLOC_CHECK_=3
+ifeq ($(experimental-malloc),yes)
+CPPFLAGS-malloc.c += -DPER_THREAD -DATOMIC_FASTBINS
+endif
# Uncomment this for test releases. For public releases it is too expensive.
#CPPFLAGS-malloc.o += -DMALLOC_DEBUG=1
/* Malloc implementation for multiple threads without lock contention.
- Copyright (C) 2001,2002,2003,2004,2005,2006,2007
+ Copyright (C) 2001,2002,2003,2004,2005,2006,2007,2009
Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Wolfram Gloger <wg@malloc.de>, 2001.
static tsd_key_t arena_key;
static mutex_t list_lock;
+#ifdef PER_THREAD
+static size_t narenas;
+static mstate free_list;
+#endif
#if THREAD_STATS
static int stat_n_heaps;
in the new arena. */
#define arena_get(ptr, size) do { \
+ arena_lookup(ptr); \
+ arena_lock(ptr, size); \
+} while(0)
+
+#define arena_lookup(ptr) do { \
Void_t *vptr = NULL; \
ptr = (mstate)tsd_getspecific(arena_key, vptr); \
+} while(0)
+
+#ifdef PER_THREAD
+#define arena_lock(ptr, size) do { \
+ if(ptr) \
+ (void)mutex_lock(&ptr->mutex); \
+ else \
+ ptr = arena_get2(ptr, (size)); \
+} while(0)
+#else
+#define arena_lock(ptr, size) do { \
if(ptr && !mutex_trylock(&ptr->mutex)) { \
THREAD_STAT(++(ptr->stat_lock_direct)); \
} else \
ptr = arena_get2(ptr, (size)); \
} while(0)
+#endif
/* find the heap and corresponding arena for a given ptr */
}
#endif
+#ifdef ATOMIC_FASTBINS
+ ar_ptr = arena_for_chunk(p);
+ tsd_getspecific(arena_key, vptr);
+ _int_free(ar_ptr, p, vptr == ATFORK_ARENA_PTR);
+#else
ar_ptr = arena_for_chunk(p);
tsd_getspecific(arena_key, vptr);
if(vptr != ATFORK_ARENA_PTR)
_int_free(ar_ptr, p);
if(vptr != ATFORK_ARENA_PTR)
(void)mutex_unlock(&ar_ptr->mutex);
+#endif
}
__malloc_hook = save_malloc_hook;
__free_hook = save_free_hook;
#endif
+#ifdef PER_THREAD
+ free_list = NULL;
+#endif
for(ar_ptr = &main_arena;;) {
mutex_init(&ar_ptr->mutex);
+#ifdef PER_THREAD
+ if (ar_ptr != save_arena) {
+ ar_ptr->next_free = free_list;
+ free_list = ar_ptr;
+ }
+#endif
ar_ptr = ar_ptr->next;
if(ar_ptr == &main_arena) break;
}
mp_.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
mp_.trim_threshold = DEFAULT_TRIM_THRESHOLD;
mp_.pagesize = malloc_getpagesize;
+#ifdef PER_THREAD
+# define NARENAS_FROM_NCORES(n) ((n) * (sizeof(long) == 4 ? 2 : 8))
+ mp_.arena_test = NARENAS_FROM_NCORES (1);
+ narenas = 1;
+#endif
}
}
break;
case 9:
- if (! secure && memcmp (envline, "MMAP_MAX_", 9) == 0)
- mALLOPt(M_MMAP_MAX, atoi(&envline[10]));
+ if (! secure)
+ {
+ if (memcmp (envline, "MMAP_MAX_", 9) == 0)
+ mALLOPt(M_MMAP_MAX, atoi(&envline[10]));
+#ifdef PER_THREAD
+ else if (memcmp (envline, "ARENA_MAX", 9) == 0)
+ mALLOPt(M_ARENA_MAX, atoi(&envline[10]));
+#endif
+ }
break;
+#ifdef PER_THREAD
+ case 10:
+ if (! secure)
+ {
+ if (memcmp (envline, "ARENA_TEST", 10) == 0)
+ mALLOPt(M_ARENA_TEST, atoi(&envline[11]));
+ }
+ break;
+#endif
case 15:
if (! secure)
{
top(a) = (mchunkptr)ptr;
set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE);
+ tsd_setspecific(arena_key, (Void_t *)a);
+ mutex_init(&a->mutex);
+ (void)mutex_lock(&a->mutex);
+
+#ifdef PER_THREAD
+ (void)mutex_lock(&list_lock);
+#endif
+
+ /* Add the new arena to the global list. */
+ a->next = main_arena.next;
+ atomic_write_barrier ();
+ main_arena.next = a;
+
+#ifdef PER_THREAD
+ ++narenas;
+
+ (void)mutex_unlock(&list_lock);
+#endif
+
+ THREAD_STAT(++(a->stat_lock_loop));
+
return a;
}
+
+#ifdef PER_THREAD
+static mstate
+get_free_list (void)
+{
+ mstate result = free_list;
+ if (result != NULL)
+ {
+ (void)mutex_lock(&list_lock);
+ result = free_list;
+ if (result != NULL)
+ free_list = result->next_free;
+ (void)mutex_unlock(&list_lock);
+
+ if (result != NULL)
+ {
+ (void)mutex_lock(&result->mutex);
+ tsd_setspecific(arena_key, (Void_t *)result);
+ THREAD_STAT(++(result->stat_lock_loop));
+ }
+ }
+
+ return result;
+}
+
+
+static mstate
+reused_arena (void)
+{
+ if (narenas <= mp_.arena_test)
+ return NULL;
+
+ static int narenas_limit;
+ if (narenas_limit == 0)
+ {
+ if (mp_.arena_max != 0)
+ narenas_limit = mp_.arena_max;
+ else
+ {
+ int n = __get_nprocs ();
+
+ if (n >= 1)
+ narenas_limit = NARENAS_FROM_NCORES (n);
+ else
+ /* We have no information about the system. Assume two
+ cores. */
+ narenas_limit = NARENAS_FROM_NCORES (2);
+ }
+ }
+
+ if (narenas < narenas_limit)
+ return NULL;
+
+ mstate result;
+ static mstate next_to_use;
+ if (next_to_use == NULL)
+ next_to_use = &main_arena;
+
+ result = next_to_use;
+ do
+ {
+ if (!mutex_trylock(&result->mutex))
+ goto out;
+
+ result = result->next;
+ }
+ while (result != next_to_use);
+
+ /* No arena available. Wait for the next in line. */
+ (void)mutex_lock(&result->mutex);
+
+ out:
+ tsd_setspecific(arena_key, (Void_t *)result);
+ THREAD_STAT(++(result->stat_lock_loop));
+ next_to_use = result->next;
+
+ return result;
+}
+#endif
+
static mstate
internal_function
#if __STD_C
{
mstate a;
+#ifdef PER_THREAD
+ if ((a = get_free_list ()) == NULL
+ && (a = reused_arena ()) == NULL)
+ /* Nothing immediately available, so generate a new arena. */
+ a = _int_new_arena(size);
+#else
if(!a_tsd)
a = a_tsd = &main_arena;
else {
/* Nothing immediately available, so generate a new arena. */
a = _int_new_arena(size);
- if(a)
- {
- tsd_setspecific(arena_key, (Void_t *)a);
- mutex_init(&a->mutex);
- mutex_lock(&a->mutex); /* remember result */
-
- /* Add the new arena to the global list. */
- a->next = main_arena.next;
- atomic_write_barrier ();
- main_arena.next = a;
-
- THREAD_STAT(++(a->stat_lock_loop));
- }
(void)mutex_unlock(&list_lock);
+#endif
return a;
}
+#ifdef PER_THREAD
+static void __attribute__ ((section ("__libc_thread_freeres_fn")))
+arena_thread_freeres (void)
+{
+ Void_t *vptr = NULL;
+ mstate a = tsd_getspecific(arena_key, vptr);
+ tsd_setspecific(arena_key, NULL);
+
+ if (a != NULL)
+ {
+ (void)mutex_lock(&list_lock);
+ a->next_free = free_list;
+ free_list = a;
+ (void)mutex_unlock(&list_lock);
+ }
+}
+text_set_element (__libc_thread_subfreeres, arena_thread_freeres);
+#endif
+
#endif /* USE_ARENAS */
/*
mchunkptr p;
if(!mem) return;
- (void)mutex_lock(&main_arena.mutex);
p = mem2chunk_check(mem, NULL);
if(!p) {
- (void)mutex_unlock(&main_arena.mutex);
-
malloc_printerr(check_action, "free(): invalid pointer", mem);
return;
}
#if HAVE_MMAP
if (chunk_is_mmapped(p)) {
- (void)mutex_unlock(&main_arena.mutex);
munmap_chunk(p);
return;
}
#if 0 /* Erase freed memory. */
memset(mem, 0, chunksize(p) - (SIZE_SZ+1));
#endif
+#ifdef ATOMIC_FASTBINS
+ _int_free(&main_arena, p, 0);
+#else
+ (void)mutex_lock(&main_arena.mutex);
_int_free(&main_arena, p);
(void)mutex_unlock(&main_arena.mutex);
+#endif
}
static Void_t*
return;
}
#endif
+#ifdef ATOMIC_FASTBINS
+ _int_free(&main_arena, p, 1);
+#else
_int_free(&main_arena, p);
+#endif
}
# endif /* !defiend NO_STARTER */
clear_fastchunks(&main_arena);
set_max_fast(DEFAULT_MXFAST);
for (i=0; i<NFASTBINS; ++i)
- main_arena.fastbins[i] = 0;
+ fastbin (&main_arena, i) = 0;
for (i=0; i<BINMAPSIZE; ++i)
main_arena.binmap[i] = 0;
top(&main_arena) = ms->av[2];
Tuning options that are also dynamically changeable via mallopt:
- DEFAULT_MXFAST 64
+ DEFAULT_MXFAST 64 (for 32bit), 128 (for 64bit)
DEFAULT_TRIM_THRESHOLD 128 * 1024
DEFAULT_TOP_PAD 0
DEFAULT_MMAP_THRESHOLD 128 * 1024
#include <malloc-machine.h>
#ifdef _LIBC
+#ifdef ATOMIC_FASTBINS
+#include <atomic.h>
+#endif
#include <stdio-common/_itoa.h>
#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
#endif
#ifdef __cplusplus
or other mallocs available that do this.
*/
-#if MALLOC_DEBUG
#include <assert.h>
-#else
-#undef assert
-#define assert(x) ((void)0)
-#endif
/*
#endif
#ifndef DEFAULT_MXFAST
-#define DEFAULT_MXFAST 64
+#define DEFAULT_MXFAST (64 * SIZE_SZ / 4)
#endif
#if __STD_C
static Void_t* _int_malloc(mstate, size_t);
+#ifdef ATOMIC_FASTBINS
+static void _int_free(mstate, mchunkptr, int);
+#else
static void _int_free(mstate, mchunkptr);
+#endif
static Void_t* _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T);
static Void_t* _int_memalign(mstate, size_t, size_t);
static Void_t* _int_valloc(mstate, size_t);
*/
typedef struct malloc_chunk* mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
/* offset 2 to use otherwise unindexable first 2 bins */
-#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2)
+#define fastbin_index(sz) \
+ ((((unsigned int)(sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
/* The maximum fastbin request size we support */
-#define MAX_FAST_SIZE 80
+#define MAX_FAST_SIZE (80 * SIZE_SZ / 4)
#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1)
#define FASTCHUNKS_BIT (1U)
#define have_fastchunks(M) (((M)->flags & FASTCHUNKS_BIT) == 0)
+#ifdef ATOMIC_FASTBINS
+#define clear_fastchunks(M) catomic_or (&(M)->flags, FASTCHUNKS_BIT)
+#define set_fastchunks(M) catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
+#else
#define clear_fastchunks(M) ((M)->flags |= FASTCHUNKS_BIT)
#define set_fastchunks(M) ((M)->flags &= ~FASTCHUNKS_BIT)
+#endif
/*
NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
#endif
/* Fastbins */
- mfastbinptr fastbins[NFASTBINS];
+ mfastbinptr fastbinsY[NFASTBINS];
/* Base of the topmost chunk -- not otherwise kept in a bin */
mchunkptr top;
/* Linked list */
struct malloc_state *next;
+#ifdef PER_THREAD
+ /* Linked list for free arenas. */
+ struct malloc_state *next_free;
+#endif
+
/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
unsigned long trim_threshold;
INTERNAL_SIZE_T top_pad;
INTERNAL_SIZE_T mmap_threshold;
+#ifdef PER_THREAD
+ INTERNAL_SIZE_T arena_test;
+ INTERNAL_SIZE_T arena_max;
+#endif
/* Memory map support */
int n_mmaps;
static struct malloc_par mp_;
+#ifdef PER_THREAD
+/* Non public mallopt parameters. */
+#define M_ARENA_TEST -7
+#define M_ARENA_MAX -8
+#endif
+
+
/* Maximum size of memory handled in fastbins. */
static INTERNAL_SIZE_T global_max_fast;
/* Precondition: not enough current space to satisfy nb request */
assert((unsigned long)(old_size) < (unsigned long)(nb + MINSIZE));
+#ifndef ATOMIC_FASTBINS
/* Precondition: all fastbins are consolidated */
assert(!have_fastchunks(av));
+#endif
if (av != &main_arena) {
set_head(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)|PREV_INUSE);
set_foot(chunk_at_offset(old_top, old_size), (2*SIZE_SZ));
set_head(old_top, old_size|PREV_INUSE|NON_MAIN_ARENA);
+#ifdef ATOMIC_FASTBINS
+ _int_free(av, old_top, 1);
+#else
_int_free(av, old_top);
+#endif
} else {
set_head(old_top, (old_size + 2*SIZE_SZ)|PREV_INUSE);
set_foot(old_top, (old_size + 2*SIZE_SZ));
/* If possible, release the rest. */
if (old_size >= MINSIZE) {
+#ifdef ATOMIC_FASTBINS
+ _int_free(av, old_top, 1);
+#else
_int_free(av, old_top);
+#endif
}
}
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));
- arena_get(ar_ptr, bytes);
+ arena_lookup(ar_ptr);
+#if 0
+ // XXX We need double-word CAS and fastbins must be extended to also
+ // XXX hold a generation counter for each entry.
+ if (ar_ptr) {
+ INTERNAL_SIZE_T nb; /* normalized request size */
+ checked_request2size(bytes, nb);
+ if (nb <= get_max_fast ()) {
+ long int idx = fastbin_index(nb);
+ mfastbinptr* fb = &fastbin (ar_ptr, idx);
+ mchunkptr pp = *fb;
+ mchunkptr v;
+ do
+ {
+ v = pp;
+ if (v == NULL)
+ break;
+ }
+ while ((pp = catomic_compare_and_exchange_val_acq (fb, v->fd, v)) != v);
+ if (v != 0) {
+ if (__builtin_expect (fastbin_index (chunksize (v)) != idx, 0))
+ malloc_printerr (check_action, "malloc(): memory corruption (fast)",
+ chunk2mem (v));
+ check_remalloced_chunk(ar_ptr, v, nb);
+ void *p = chunk2mem(v);
+ if (__builtin_expect (perturb_byte, 0))
+ alloc_perturb (p, bytes);
+ return p;
+ }
+ }
+ }
+#endif
+
+ arena_lock(ar_ptr, bytes);
if(!ar_ptr)
return 0;
victim = _int_malloc(ar_ptr, bytes);
#endif
ar_ptr = arena_for_chunk(p);
-#if THREAD_STATS
+#ifdef ATOMIC_FASTBINS
+ _int_free(ar_ptr, p, 0);
+#else
+# if THREAD_STATS
if(!mutex_trylock(&ar_ptr->mutex))
++(ar_ptr->stat_lock_direct);
else {
(void)mutex_lock(&ar_ptr->mutex);
++(ar_ptr->stat_lock_wait);
}
-#else
+# else
(void)mutex_lock(&ar_ptr->mutex);
-#endif
+# endif
_int_free(ar_ptr, p);
(void)mutex_unlock(&ar_ptr->mutex);
+#endif
}
#ifdef libc_hidden_def
libc_hidden_def (public_fREe)
(void)mutex_lock(&ar_ptr->mutex);
#endif
-#ifndef NO_THREADS
+#if !defined NO_THREADS && !defined PER_THREAD
/* As in malloc(), remember this arena for the next allocation. */
tsd_setspecific(arena_key, (Void_t *)ar_ptr);
#endif
if (newp != NULL)
{
MALLOC_COPY (newp, oldmem, oldsize - SIZE_SZ);
-#if THREAD_STATS
+#ifdef ATOMIC_FASTBINS
+ _int_free(ar_ptr, oldp, 0);
+#else
+# if THREAD_STATS
if(!mutex_trylock(&ar_ptr->mutex))
++(ar_ptr->stat_lock_direct);
else {
(void)mutex_lock(&ar_ptr->mutex);
++(ar_ptr->stat_lock_wait);
}
-#else
+# else
(void)mutex_lock(&ar_ptr->mutex);
-#endif
+# endif
_int_free(ar_ptr, oldp);
(void)mutex_unlock(&ar_ptr->mutex);
+#endif
}
}
INTERNAL_SIZE_T nb; /* normalized request size */
unsigned int idx; /* associated bin index */
mbinptr bin; /* associated bin */
- mfastbinptr* fb; /* associated fastbin */
mchunkptr victim; /* inspected/selected chunk */
INTERNAL_SIZE_T size; /* its size */
*/
if ((unsigned long)(nb) <= (unsigned long)(get_max_fast ())) {
- long int idx = fastbin_index(nb);
- fb = &(av->fastbins[idx]);
- if ( (victim = *fb) != 0) {
+ idx = fastbin_index(nb);
+ mfastbinptr* fb = &fastbin (av, idx);
+#ifdef ATOMIC_FASTBINS
+ mchunkptr pp = *fb;
+ do
+ {
+ victim = pp;
+ if (victim == NULL)
+ break;
+ }
+ while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
+ != victim);
+#else
+ victim = *fb;
+#endif
+ if (victim != 0) {
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
malloc_printerr (check_action, "malloc(): memory corruption (fast)",
chunk2mem (victim));
+#ifndef ATOMIC_FASTBINS
*fb = victim->fd;
+#endif
check_remalloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
if (__builtin_expect (perturb_byte, 0))
return p;
}
+#ifdef ATOMIC_FASTBINS
+ /* When we are using atomic ops to free fast chunks we can get
+ here for all block sizes. */
+ else if (have_fastchunks(av)) {
+ malloc_consolidate(av);
+ /* restore original bin index */
+ if (in_smallbin_range(nb))
+ idx = smallbin_index(nb);
+ else
+ idx = largebin_index(nb);
+ }
+#else
/*
If there is space available in fastbins, consolidate and retry,
to possibly avoid expanding memory. This can occur only if nb is
malloc_consolidate(av);
idx = smallbin_index(nb); /* restore original bin index */
}
+#endif
/*
Otherwise, relay to handle system-dependent cases
*/
static void
+#ifdef ATOMIC_FASTBINS
+_int_free(mstate av, mchunkptr p, int have_lock)
+#else
_int_free(mstate av, mchunkptr p)
+#endif
{
INTERNAL_SIZE_T size; /* its size */
mfastbinptr* fb; /* associated fastbin */
mchunkptr fwd; /* misc temp for linking */
const char *errstr = NULL;
+#ifdef ATOMIC_FASTBINS
+ int locked = 0;
+#endif
size = chunksize(p);
{
errstr = "free(): invalid pointer";
errout:
+#ifdef ATOMIC_FASTBINS
+ if (! have_lock && locked)
+ (void)mutex_unlock(&av->mutex);
+#endif
malloc_printerr (check_action, errstr, chunk2mem(p));
return;
}
goto errout;
}
+ if (__builtin_expect (perturb_byte, 0))
+ free_perturb (chunk2mem(p), size - SIZE_SZ);
+
set_fastchunks(av);
- fb = &(av->fastbins[fastbin_index(size)]);
+ fb = &fastbin (av, fastbin_index(size));
+
+#ifdef ATOMIC_FASTBINS
+ mchunkptr fd;
+ mchunkptr old = *fb;
+ do
+ {
+ /* Another simple check: make sure the top of the bin is not the
+ record we are going to add (i.e., double free). */
+ if (__builtin_expect (old == p, 0))
+ {
+ errstr = "double free or corruption (fasttop)";
+ goto errout;
+ }
+ p->fd = fd = old;
+ }
+ while ((old = catomic_compare_and_exchange_val_acq (fb, p, fd)) != fd);
+#else
/* Another simple check: make sure the top of the bin is not the
record we are going to add (i.e., double free). */
if (__builtin_expect (*fb == p, 0))
goto errout;
}
- if (__builtin_expect (perturb_byte, 0))
- free_perturb (chunk2mem(p), size - SIZE_SZ);
-
p->fd = *fb;
*fb = p;
+#endif
}
/*
*/
else if (!chunk_is_mmapped(p)) {
+#ifdef ATOMIC_FASTBINS
+ if (! have_lock) {
+# if THREAD_STATS
+ if(!mutex_trylock(&av->mutex))
+ ++(av->stat_lock_direct);
+ else {
+ (void)mutex_lock(&av->mutex);
+ ++(av->stat_lock_wait);
+ }
+# else
+ (void)mutex_lock(&av->mutex);
+# endif
+ locked = 1;
+ }
+#endif
+
nextchunk = chunk_at_offset(p, size);
/* Lightweight tests: check whether the block is already the
}
}
+#ifdef ATOMIC_FASTBINS
+ if (! have_lock) {
+ assert (locked);
+ (void)mutex_unlock(&av->mutex);
+ }
+#endif
}
/*
If the chunk was allocated via mmap, release via munmap(). Note
because, except for the main arena, all the others might have
blocks in the high fast bins. It's not worth it anyway, just
search all bins all the time. */
- maxfb = &(av->fastbins[fastbin_index(get_max_fast ())]);
+ maxfb = &fastbin (av, fastbin_index(get_max_fast ()));
#else
- maxfb = &(av->fastbins[NFASTBINS - 1]);
+ maxfb = &fastbin (av, NFASTBINS - 1);
#endif
- fb = &(av->fastbins[0]);
+ fb = &fastbin (av, 0);
do {
- if ( (p = *fb) != 0) {
- *fb = 0;
-
+#ifdef ATOMIC_FASTBINS
+ p = atomic_exchange_acq (fb, 0);
+#else
+ p = *fb;
+#endif
+ if (p != 0) {
+#ifndef ATOMIC_FASTBINS
+ *fb = 0;
+#endif
do {
check_inuse_chunk(av, p);
nextp = p->fd;
}
}
+#ifdef ATOMIC_FASTBINS
+ _int_free(av, oldp, 1);
+#else
_int_free(av, oldp);
+#endif
check_inuse_chunk(av, newp);
return chunk2mem(newp);
}
(av != &main_arena ? NON_MAIN_ARENA : 0));
/* Mark remainder as inuse so free() won't complain */
set_inuse_bit_at_offset(remainder, remainder_size);
+#ifdef ATOMIC_FASTBINS
+ _int_free(av, remainder, 1);
+#else
_int_free(av, remainder);
+#endif
}
check_inuse_chunk(av, newp);
newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK);
if (newmem != 0) {
MALLOC_COPY(newmem, chunk2mem(oldp), oldsize - 2*SIZE_SZ);
+#ifdef ATOMIC_FASTBINS
+ _int_free(av, oldp, 1);
+#else
_int_free(av, oldp);
+#endif
}
}
return newmem;
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_inuse_bit_at_offset(newp, newsize);
set_head_size(p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+#ifdef ATOMIC_FASTBINS
+ _int_free(av, p, 1);
+#else
_int_free(av, p);
+#endif
p = newp;
assert (newsize >= nb &&
set_head(remainder, remainder_size | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head_size(p, nb);
+#ifdef ATOMIC_FASTBINS
+ _int_free(av, remainder, 1);
+#else
_int_free(av, remainder);
+#endif
}
}
fastavail = 0;
for (i = 0; i < NFASTBINS; ++i) {
- for (p = av->fastbins[i]; p != 0; p = p->fd) {
+ for (p = fastbin (av, i); p != 0; p = p->fd) {
++nfastblocks;
fastavail += chunksize(p);
}
case M_PERTURB:
perturb_byte = value;
break;
+
+#ifdef PER_THREAD
+ case M_ARENA_TEST:
+ if (value > 0)
+ mp_.arena_test = value;
+ break;
+
+ case M_ARENA_MAX:
+ if (value > 0)
+ mp_.arena_max = value;
+ break;
+#endif
}
(void)mutex_unlock(&av->mutex);
return res;
/* Prototypes and definition for malloc implementation.
- Copyright (C) 1996, 1997, 1999, 2000, 2002-2004, 2005, 2007
+ Copyright (C) 1996, 1997, 1999, 2000, 2002-2004, 2005, 2007, 2009
Free Software Foundation, Inc.
This file is part of the GNU C Library.
#define M_MMAP_MAX -4
#define M_CHECK_ACTION -5
#define M_PERTURB -6
+#define M_ARENA_TEST -7
+#define M_ARENA_MAX -8
/* General SVID/XPG interface to tunable parameters. */
extern int mallopt __MALLOC_P ((int __param, int __val));
#include <stdlib.h>
#include <unistd.h>
+#ifdef USE_MULTIARCH
+# include "multiarch/init-arch.h"
+#endif
+
static const struct intel_02_cache_info
{
unsigned int idx;
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
- int max_cpuid;
int max_cpuid_ex;
long int data = -1;
long int shared = -1;
unsigned int level;
unsigned int threads = 0;
+#ifdef USE_MULTIARCH
+ if (__cpu_features.kind == arch_kind_unknown)
+ __init_cpu_features ();
+# define is_intel __cpu_features.kind == arch_kind_intel
+# define is_amd __cpu_features.kind == arch_kind_amd
+# define max_cpuid __cpu_features.max_cpuid
+#else
+ int max_cpuid;
asm volatile ("cpuid"
: "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "0" (0));
-
/* This spells out "GenuineIntel". */
- if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
+# define is_intel \
+ ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
+ /* This spells out "AuthenticAMD". */
+# define is_amd \
+ ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
+#endif
+
+ if (is_intel)
{
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
}
+#ifdef USE_MULTIARCH
+ eax = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].eax;
+ ebx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ebx;
+ ecx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx;
+ edx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].edx;
+#else
asm volatile ("cpuid"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "0" (1));
+#endif
/* Intel prefers SSSE3 instructions for memory/string routines
if they are avaiable. */
shared /= threads;
}
/* This spells out "AuthenticAMD". */
- else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+ else if (is_amd)
{
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
--- /dev/null
+ifeq ($(subdir),csu)
+aux += init-arch
+endif
--- /dev/null
+/* Initialize CPU feature data.
+ This file is part of the GNU C Library.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+ Contributed by Ulrich Drepper <drepper@redhat.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include "init-arch.h"
+
+
+struct cpu_features __cpu_features attribute_hidden;
+
+
+void
+__init_cpu_features (void)
+{
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ asm volatile ("cpuid"
+ : "=a" (__cpu_features.max_cpuid), "=b" (ebx), "=c" (ecx),
+ "=d" (edx)
+ : "0" (0));
+
+ /* This spells out "GenuineIntel". */
+ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
+ {
+ __cpu_features.kind = arch_kind_intel;
+
+ asm volatile ("cpuid"
+ : "=a" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].eax),
+ "=b" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].ebx),
+ "=c" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx),
+ "=d" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].edx)
+ : "0" (1));
+ }
+ /* This spells out "AuthenticAMD". */
+ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+ {
+ __cpu_features.kind = arch_kind_amd;
+
+ asm volatile ("cpuid"
+ : "=a" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].eax),
+ "=b" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].ebx),
+ "=c" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].ecx),
+ "=d" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].edx)
+ : "0" (1));
+ }
+ else
+ __cpu_features.kind = arch_kind_other;
+}
--- /dev/null
+/* This file is part of the GNU C Library.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/param.h>
+
+enum
+ {
+ INTEL_CPUID_INDEX_1 = 0,
+ /* Keep the following line at the end. */
+ INTEL_CPUID_INDEX_MAX
+ };
+
+enum
+ {
+ AMD_CPUID_INDEX_1 = 0,
+ /* Keep the following line at the end. */
+ AMD_CPUID_INDEX_MAX
+ };
+
+extern struct cpu_features
+{
+ enum
+ {
+ arch_kind_unknown = 0,
+ arch_kind_intel,
+ arch_kind_amd,
+ arch_kind_other
+ } kind;
+ int max_cpuid;
+ struct
+ {
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ } cpuid[MAX (INTEL_CPUID_INDEX_MAX, AMD_CPUID_INDEX_MAX)];
+} __cpu_features attribute_hidden;
+
+
+extern void __init_cpu_features (void) attribute_hidden;
+#define INIT_ARCH()\
+ do \
+ if (__cpu_features.kind == arch_kind_unknown) \
+ __init_cpu_features (); \
+ while (0)
+
+/* Following are the feature tests used throughout libc. */
+
+#define INTEL_HAS_POPCOUNT \
+ (__cpu_features.kind == arch_kind_intel \
+ && (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx & (1 << 23)) != 0)
+
+#define AMD_HAS_POPCOUNT \
+ (__cpu_features.kind == arch_kind_amd \
+ && (__cpu_features.cpuid[AMD_CPUID_INDEX_1].ecx & (1 << 23)) != 0)
--- /dev/null
+/* Count bits in CPU set. x86-64 multi-arch version.
+ This file is part of the GNU C Library.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+ Contributed by Ulrich Drepper <drepper@redhat.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef SHARED
+# include <sched.h>
+# include "init-arch.h"
+
+# define __sched_cpucount static generic_cpucount
+# include <posix/sched_cpucount.c>
+# undef __sched_cpucount
+
+# define POPCNT(l) \
+ ({ __cpu_mask r; \
+ asm ("popcntq %1, %0" : "=r" (r) : "0" (l));\
+ r; })
+# define __sched_cpucount static popcount_cpucount
+# include <posix/sched_cpucount.c>
+# undef __sched_cpucount
+
+libc_ifunc (__sched_cpucount,
+ INTEL_HAS_POPCOUNT || AMD_HAS_POPCOUNT
+ ? popcount_cpucount : generic_cpucount);
+#else
+# include_next <sched_cpucount.c>
+#endif