Add _dl_x86_cpu_features to rtld_global
authorH.J. Lu <hjl.tools@gmail.com>
Thu, 13 Aug 2015 10:37:47 +0000 (03:37 -0700)
committerH.J. Lu <hjl.tools@gmail.com>
Thu, 13 Aug 2015 10:41:22 +0000 (03:41 -0700)
This patch adds _dl_x86_cpu_features to rtld_global in x86 ld.so
and initializes it early before __libc_start_main is called so that
cpu_features is always available when it is used and we can avoid
calling __init_cpu_features in IFUNC selectors.

* sysdeps/i386/dl-machine.h: Include <cpu-features.c>.
(dl_platform_init): Call init_cpu_features.
* sysdeps/i386/dl-procinfo.c (_dl_x86_cpu_features): New.
* sysdeps/i386/i686/cacheinfo.c
(DISABLE_PREFERRED_MEMORY_INSTRUCTION): Removed.
* sysdeps/i386/i686/multiarch/Makefile (aux): Remove init-arch.
* sysdeps/i386/i686/multiarch/Versions: Removed.
* sysdeps/i386/i686/multiarch/ifunc-defines.sym (KIND_OFFSET):
Removed.
* sysdeps/i386/ldsodefs.h: Include <cpu-features.h>.
* sysdeps/unix/sysv/linux/x86/Makefile
(libpthread-sysdep_routines): Remove init-arch.
* sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c: Include
<sysdeps/x86_64/dl-procinfo.c> instead of
sysdeps/generic/dl-procinfo.c>.
* sysdeps/x86/Makefile [$(subdir) == csu] (gen-as-const-headers):
Add cpu-features-offsets.sym and rtld-global-offsets.sym.
[$(subdir) == elf] (sysdep-dl-routines): Add dl-get-cpu-features.
[$(subdir) == elf] (tests): Add tst-get-cpu-features.
[$(subdir) == elf] (tests-static): Add
tst-get-cpu-features-static.
* sysdeps/x86/Versions: New file.
* sysdeps/x86/cpu-features-offsets.sym: Likewise.
* sysdeps/x86/cpu-features.c: Likewise.
* sysdeps/x86/cpu-features.h: Likewise.
* sysdeps/x86/dl-get-cpu-features.c: Likewise.
* sysdeps/x86/libc-start.c: Likewise.
* sysdeps/x86/rtld-global-offsets.sym: Likewise.
* sysdeps/x86/tst-get-cpu-features-static.c: Likewise.
* sysdeps/x86/tst-get-cpu-features.c: Likewise.
* sysdeps/x86_64/dl-procinfo.c: Likewise.
* sysdeps/x86_64/cacheinfo.c (__cpuid_count): Removed.
Assume USE_MULTIARCH is defined and don't check it.
(is_intel): Replace __cpu_features with GLRO(dl_x86_cpu_features).
(is_amd): Likewise.
(max_cpuid): Likewise.
(intel_check_word): Likewise.
(__cache_sysconf): Don't call __init_cpu_features.
(__x86_preferred_memory_instruction): Removed.
(init_cacheinfo): Don't call __init_cpu_features. Replace
__cpu_features with GLRO(dl_x86_cpu_features).
* sysdeps/x86_64/dl-machine.h: <cpu-features.c>.
(dl_platform_init): Call init_cpu_features.
* sysdeps/x86_64/ldsodefs.h: Include <cpu-features.h>.
* sysdeps/x86_64/multiarch/Makefile (aux): Remove init-arch.
* sysdeps/x86_64/multiarch/Versions: Removed.
* sysdeps/x86_64/multiarch/cacheinfo.c: Likewise.
* sysdeps/x86_64/multiarch/init-arch.c: Likewise.
* sysdeps/x86_64/multiarch/ifunc-defines.sym (KIND_OFFSET):
Removed.
* sysdeps/x86_64/multiarch/init-arch.h: Rewrite.

28 files changed:
ChangeLog
sysdeps/i386/dl-machine.h
sysdeps/i386/dl-procinfo.c
sysdeps/i386/i686/cacheinfo.c
sysdeps/i386/i686/multiarch/Makefile
sysdeps/i386/i686/multiarch/ifunc-defines.sym
sysdeps/i386/ldsodefs.h
sysdeps/unix/sysv/linux/x86/Makefile
sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c
sysdeps/x86/Makefile
sysdeps/x86/Versions [moved from sysdeps/i386/i686/multiarch/Versions with 87% similarity]
sysdeps/x86/cpu-features-offsets.sym [new file with mode: 0644]
sysdeps/x86/cpu-features.c [moved from sysdeps/x86_64/multiarch/init-arch.c with 65% similarity]
sysdeps/x86/cpu-features.h [new file with mode: 0644]
sysdeps/x86/dl-get-cpu-features.c [new file with mode: 0644]
sysdeps/x86/libc-start.c [new file with mode: 0644]
sysdeps/x86/rtld-global-offsets.sym [new file with mode: 0644]
sysdeps/x86/tst-get-cpu-features-static.c [new file with mode: 0644]
sysdeps/x86/tst-get-cpu-features.c [new file with mode: 0644]
sysdeps/x86_64/cacheinfo.c
sysdeps/x86_64/dl-machine.h
sysdeps/x86_64/dl-procinfo.c [new file with mode: 0644]
sysdeps/x86_64/ldsodefs.h
sysdeps/x86_64/multiarch/Makefile
sysdeps/x86_64/multiarch/Versions [deleted file]
sysdeps/x86_64/multiarch/cacheinfo.c [deleted file]
sysdeps/x86_64/multiarch/ifunc-defines.sym
sysdeps/x86_64/multiarch/init-arch.h

index d056197..2775dba 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,57 @@
+2015-08-13  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * sysdeps/i386/dl-machine.h: Include <cpu-features.c>.
+       (dl_platform_init): Call init_cpu_features.
+       * sysdeps/i386/dl-procinfo.c (_dl_x86_cpu_features): New.
+       * sysdeps/i386/i686/cacheinfo.c
+       (DISABLE_PREFERRED_MEMORY_INSTRUCTION): Removed.
+       * sysdeps/i386/i686/multiarch/Makefile (aux): Remove init-arch.
+       * sysdeps/i386/i686/multiarch/Versions: Removed.
+       * sysdeps/i386/i686/multiarch/ifunc-defines.sym (KIND_OFFSET):
+       Removed.
+       * sysdeps/i386/ldsodefs.h: Include <cpu-features.h>.
+       * sysdeps/unix/sysv/linux/x86/Makefile
+       (libpthread-sysdep_routines): Remove init-arch.
+       * sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c: Include
+       <sysdeps/x86_64/dl-procinfo.c> instead of
+       sysdeps/generic/dl-procinfo.c>.
+       * sysdeps/x86/Makefile [$(subdir) == csu] (gen-as-const-headers):
+       Add cpu-features-offsets.sym and rtld-global-offsets.sym.
+       [$(subdir) == elf] (sysdep-dl-routines): Add dl-get-cpu-features.
+       [$(subdir) == elf] (tests): Add tst-get-cpu-features.
+       [$(subdir) == elf] (tests-static): Add
+       tst-get-cpu-features-static.
+       * sysdeps/x86/Versions: New file.
+       * sysdeps/x86/cpu-features-offsets.sym: Likewise.
+       * sysdeps/x86/cpu-features.c: Likewise.
+       * sysdeps/x86/cpu-features.h: Likewise.
+       * sysdeps/x86/dl-get-cpu-features.c: Likewise.
+       * sysdeps/x86/libc-start.c: Likewise.
+       * sysdeps/x86/rtld-global-offsets.sym: Likewise.
+       * sysdeps/x86/tst-get-cpu-features-static.c: Likewise.
+       * sysdeps/x86/tst-get-cpu-features.c: Likewise.
+       * sysdeps/x86_64/dl-procinfo.c: Likewise.
+       * sysdeps/x86_64/cacheinfo.c (__cpuid_count): Removed.
+       Assume USE_MULTIARCH is defined and don't check it.
+       (is_intel): Replace __cpu_features with GLRO(dl_x86_cpu_features).
+       (is_amd): Likewise.
+       (max_cpuid): Likewise.
+       (intel_check_word): Likewise.
+       (__cache_sysconf): Don't call __init_cpu_features.
+       (__x86_preferred_memory_instruction): Removed.
+       (init_cacheinfo): Don't call __init_cpu_features. Replace
+       __cpu_features with GLRO(dl_x86_cpu_features).
+       * sysdeps/x86_64/dl-machine.h: <cpu-features.c>.
+       (dl_platform_init): Call init_cpu_features.
+       * sysdeps/x86_64/ldsodefs.h: Include <cpu-features.h>.
+       * sysdeps/x86_64/multiarch/Makefile (aux): Remove init-arch.
+       * sysdeps/x86_64/multiarch/Versions: Removed.
+       * sysdeps/x86_64/multiarch/cacheinfo.c: Likewise.
+       * sysdeps/x86_64/multiarch/init-arch.c: Likewise.
+       * sysdeps/x86_64/multiarch/ifunc-defines.sym (KIND_OFFSET):
+       Removed.
+       * sysdeps/x86_64/multiarch/init-arch.h: Rewrite.
+
 2015-08-12  Paul Pluzhnikov  <ppluzhnikov@google.com>
 
        [BZ #18820]
index 04f9247..4a28eb3 100644 (file)
@@ -25,6 +25,7 @@
 #include <sysdep.h>
 #include <tls.h>
 #include <dl-tlsdesc.h>
+#include <cpu-features.c>
 
 /* Return nonzero iff ELF header is compatible with the running host.  */
 static inline int __attribute__ ((unused))
@@ -235,6 +236,8 @@ dl_platform_init (void)
   if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
     /* Avoid an empty string which would disturb us.  */
     GLRO(dl_platform) = NULL;
+
+  init_cpu_features (&GLRO(dl_x86_cpu_features));
 }
 
 static inline Elf32_Addr
index b673b3c..e95f335 100644 (file)
 # define PROCINFO_CLASS
 #endif
 
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+  ._dl_x86_cpu_features
+# else
+PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features
+# endif
+# ifndef PROCINFO_DECL
+= { }
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
 #if !defined PROCINFO_DECL && defined SHARED
   ._dl_x86_cap_flags
 #else
index 0f869df..0b50c6d 100644 (file)
@@ -1,4 +1,3 @@
 #define DISABLE_PREFETCHW
-#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
 
 #include <sysdeps/x86_64/cacheinfo.c>
index 11ce4ba..31bfd39 100644 (file)
@@ -1,5 +1,4 @@
 ifeq ($(subdir),csu)
-aux += init-arch
 tests += test-multiarch
 gen-as-const-headers += ifunc-defines.sym
 endif
index eb1538a..96e9cfa 100644 (file)
@@ -4,7 +4,6 @@
 --
 
 CPU_FEATURES_SIZE      sizeof (struct cpu_features)
-KIND_OFFSET            offsetof (struct cpu_features, kind)
 CPUID_OFFSET           offsetof (struct cpu_features, cpuid)
 CPUID_SIZE             sizeof (struct cpuid_registers)
 CPUID_EAX_OFFSET       offsetof (struct cpuid_registers, eax)
index d80cf01..dae2d04 100644 (file)
@@ -20,6 +20,7 @@
 #define        _I386_LDSODEFS_H        1
 
 #include <elf.h>
+#include <cpu-features.h>
 
 struct La_i86_regs;
 struct La_i86_retval;
index d6be472..9e6ec44 100644 (file)
@@ -15,7 +15,6 @@ sysdep_headers += sys/elf.h sys/perm.h sys/reg.h sys/vm86.h sys/debugreg.h sys/i
 endif
 
 ifeq ($(subdir),nptl)
-libpthread-sysdep_routines += init-arch
 libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
                              elision-trylock
 endif
index 8ac351e..a3c0c19 100644 (file)
@@ -1,5 +1,5 @@
 #if IS_IN (ldconfig)
 # include <sysdeps/i386/dl-procinfo.c>
 #else
-# include <sysdeps/generic/dl-procinfo.c>
+# include <sysdeps/x86_64/dl-procinfo.c>
 #endif
index 19f5eca..c262fdf 100644 (file)
@@ -8,3 +8,14 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/x86/tst-ld-sse-use.sh $(objpfx)ld.so
        $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \
        $(evaluate-test)
 endif
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym
+endif
+
+ifeq ($(subdir),elf)
+sysdep-dl-routines += dl-get-cpu-features
+
+tests += tst-get-cpu-features
+tests-static += tst-get-cpu-features-static
+endif
similarity index 87%
rename from sysdeps/i386/i686/multiarch/Versions
rename to sysdeps/x86/Versions
index 59b185a..e029237 100644 (file)
@@ -1,4 +1,4 @@
-libc {
+ld {
   GLIBC_PRIVATE {
     __get_cpu_features;
   }
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
new file mode 100644 (file)
index 0000000..a9d53d1
--- /dev/null
@@ -0,0 +1,7 @@
+#define SHARED 1
+
+#include <ldsodefs.h>
+
+#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
+
+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features)
similarity index 65%
rename from sysdeps/x86_64/multiarch/init-arch.c
rename to sysdeps/x86/cpu-features.c
index aaad5fa..587080c 100644 (file)
@@ -1,7 +1,6 @@
 /* Initialize CPU feature data.
    This file is part of the GNU C Library.
    Copyright (C) 2008-2015 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <atomic.h>
 #include <cpuid.h>
-#include "init-arch.h"
+#include <cpu-features.h>
 
-
-struct cpu_features __cpu_features attribute_hidden;
-
-
-static void
-get_common_indeces (unsigned int *family, unsigned int *model)
+static inline void
+get_common_indeces (struct cpu_features *cpu_features,
+                   unsigned int *family, unsigned int *model)
 {
-  __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax,
-          __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
-          __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
-          __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
-
-  unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
+  unsigned int eax;
+  __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
+          cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
+          cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
+  GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax;
   *family = (eax >> 8) & 0x0f;
   *model = (eax >> 4) & 0x0f;
 }
 
-
-void
-__init_cpu_features (void)
+static inline void
+init_cpu_features (struct cpu_features *cpu_features)
 {
-  unsigned int ebx;
-  unsigned int ecx;
-  unsigned int edx;
+  unsigned int ebx, ecx, edx;
   unsigned int family = 0;
   unsigned int model = 0;
   enum cpu_features_kind kind;
 
-  __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx);
+  __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx);
 
   /* This spells out "GenuineIntel".  */
   if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
     {
       kind = arch_kind_intel;
 
-      get_common_indeces (&family, &model);
+      get_common_indeces (cpu_features, &family, &model);
 
-      unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
+      unsigned int eax = cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax;
       unsigned int extended_family = (eax >> 20) & 0xff;
       unsigned int extended_model = (eax >> 12) & 0xf0;
       if (family == 0x0f)
@@ -68,14 +59,14 @@ __init_cpu_features (void)
        }
       else if (family == 0x06)
        {
-         ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+         ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
          model += extended_model;
          switch (model)
            {
            case 0x1c:
            case 0x26:
              /* BSF is slow on Atom.  */
-             __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
+             cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF;
              break;
 
            case 0x37:
@@ -91,7 +82,7 @@ __init_cpu_features (void)
 #if index_Fast_Unaligned_Load != index_Slow_SSE4_2
 # error index_Fast_Unaligned_Load != index_Slow_SSE4_2
 #endif
-             __cpu_features.feature[index_Fast_Unaligned_Load]
+             cpu_features->feature[index_Fast_Unaligned_Load]
                |= (bit_Fast_Unaligned_Load
                    | bit_Prefer_PMINUB_for_stringop
                    | bit_Slow_SSE4_2);
@@ -121,7 +112,7 @@ __init_cpu_features (void)
 #if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
 # error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
 #endif
-             __cpu_features.feature[index_Fast_Rep_String]
+             cpu_features->feature[index_Fast_Rep_String]
                |= (bit_Fast_Rep_String
                    | bit_Fast_Copy_Backward
                    | bit_Fast_Unaligned_Load
@@ -135,31 +126,31 @@ __init_cpu_features (void)
     {
       kind = arch_kind_amd;
 
-      get_common_indeces (&family, &model);
+      get_common_indeces (cpu_features, &family, &model);
 
-      ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+      ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
 
       unsigned int eax;
       __cpuid (0x80000000, eax, ebx, ecx, edx);
       if (eax >= 0x80000001)
        __cpuid (0x80000001,
-                __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
-                __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
-                __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
-                __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
+                cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax,
+                cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
+                cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
+                cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
     }
   else
     kind = arch_kind_other;
 
-  if (__cpu_features.max_cpuid >= 7)
+  if (cpu_features->max_cpuid >= 7)
     __cpuid_count (7, 0,
-                  __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax,
-                  __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
-                  __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
-                  __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
+                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
+                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
+                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
+                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
 
   /* Can we call xgetbv?  */
-  if (CPUID_OSXSAVE)
+  if (HAS_CPU_FEATURE (OSXSAVE))
     {
       unsigned int xcrlow;
       unsigned int xcrhigh;
@@ -169,15 +160,15 @@ __init_cpu_features (void)
          (bit_YMM_state | bit_XMM_state))
        {
          /* Determine if AVX is usable.  */
-         if (CPUID_AVX)
-           __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
+         if (HAS_CPU_FEATURE (AVX))
+           cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable;
 #if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
 # error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
 #endif
          /* Determine if AVX2 is usable.  Unaligned load with 256-bit
             AVX registers are faster on processors with AVX2.  */
-         if (CPUID_AVX2)
-           __cpu_features.feature[index_AVX2_Usable]
+         if (HAS_CPU_FEATURE (AVX2))
+           cpu_features->feature[index_AVX2_Usable]
              |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
          /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
             ZMM16-ZMM31 state are enabled.  */
@@ -186,38 +177,26 @@ __init_cpu_features (void)
              (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
            {
              /* Determine if AVX512F is usable.  */
-             if (CPUID_AVX512F)
+             if (HAS_CPU_FEATURE (AVX512F))
                {
-                 __cpu_features.feature[index_AVX512F_Usable]
+                 cpu_features->feature[index_AVX512F_Usable]
                    |= bit_AVX512F_Usable;
                  /* Determine if AVX512DQ is usable.  */
-                 if (CPUID_AVX512DQ)
-                   __cpu_features.feature[index_AVX512DQ_Usable]
+                 if (HAS_CPU_FEATURE (AVX512DQ))
+                   cpu_features->feature[index_AVX512DQ_Usable]
                      |= bit_AVX512DQ_Usable;
                }
            }
          /* Determine if FMA is usable.  */
-         if (CPUID_FMA)
-           __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
+         if (HAS_CPU_FEATURE (FMA))
+           cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable;
          /* Determine if FMA4 is usable.  */
-         if (CPUID_FMA4)
-           __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
+         if (HAS_CPU_FEATURE (FMA4))
+           cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable;
        }
     }
 
-  __cpu_features.family = family;
-  __cpu_features.model = model;
-  atomic_write_barrier ();
-  __cpu_features.kind = kind;
-}
-
-#undef __get_cpu_features
-
-const struct cpu_features *
-__get_cpu_features (void)
-{
-  if (__cpu_features.kind == arch_kind_unknown)
-    __init_cpu_features ();
-
-  return &__cpu_features;
+  cpu_features->family = family;
+  cpu_features->model = model;
+  cpu_features->kind = kind;
 }
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
new file mode 100644 (file)
index 0000000..22e5abb
--- /dev/null
@@ -0,0 +1,240 @@
+/* This file is part of the GNU C Library.
+   Copyright (C) 2008-2015 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef cpu_features_h
+#define cpu_features_h
+
+#define bit_Fast_Rep_String            (1 << 0)
+#define bit_Fast_Copy_Backward         (1 << 1)
+#define bit_Slow_BSF                   (1 << 2)
+#define bit_Fast_Unaligned_Load                (1 << 4)
+#define bit_Prefer_PMINUB_for_stringop (1 << 5)
+#define bit_AVX_Usable                 (1 << 6)
+#define bit_FMA_Usable                 (1 << 7)
+#define bit_FMA4_Usable                        (1 << 8)
+#define bit_Slow_SSE4_2                        (1 << 9)
+#define bit_AVX2_Usable                        (1 << 10)
+#define bit_AVX_Fast_Unaligned_Load    (1 << 11)
+#define bit_AVX512F_Usable             (1 << 12)
+#define bit_AVX512DQ_Usable            (1 << 13)
+
+/* CPUID Feature flags.  */
+
+/* COMMON_CPUID_INDEX_1.  */
+#define bit_SSE2       (1 << 26)
+#define bit_SSSE3      (1 << 9)
+#define bit_SSE4_1     (1 << 19)
+#define bit_SSE4_2     (1 << 20)
+#define bit_OSXSAVE    (1 << 27)
+#define bit_AVX                (1 << 28)
+#define bit_POPCOUNT   (1 << 23)
+#define bit_FMA                (1 << 12)
+#define bit_FMA4       (1 << 16)
+
+/* COMMON_CPUID_INDEX_7.  */
+#define bit_RTM                (1 << 11)
+#define bit_AVX2       (1 << 5)
+#define bit_AVX512F    (1 << 16)
+#define bit_AVX512DQ   (1 << 17)
+
+/* XCR0 Feature flags.  */
+#define bit_XMM_state  (1 << 1)
+#define bit_YMM_state  (2 << 1)
+#define bit_Opmask_state       (1 << 5)
+#define bit_ZMM0_15_state      (1 << 6)
+#define bit_ZMM16_31_state     (1 << 7)
+
+/* The integer bit array index for the first set of internal feature bits.  */
+#define FEATURE_INDEX_1 0
+
+/* The current maximum size of the feature integer bit array.  */
+#define FEATURE_INDEX_MAX 1
+
+#ifdef __ASSEMBLER__
+
+# include <ifunc-defines.h>
+# include <rtld-global-offsets.h>
+
+# define index_SSE2    COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
+# define index_SSSE3   COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_SSE4_1  COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_SSE4_2  COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_AVX     COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_AVX2    COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
+
+# define index_Fast_Rep_String         FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Fast_Copy_Backward      FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Slow_BSF                        FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Fast_Unaligned_Load     FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Usable              FEATURE_INDEX_1*FEATURE_SIZE
+# define index_FMA_Usable              FEATURE_INDEX_1*FEATURE_SIZE
+# define index_FMA4_Usable             FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Slow_SSE4_2             FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX2_Usable             FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX512F_Usable          FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX512DQ_Usable         FEATURE_INDEX_1*FEATURE_SIZE
+
+# if defined (_LIBC) && !IS_IN (nonlib)
+#  ifdef __x86_64__
+#   ifdef SHARED
+#    if IS_IN (rtld)
+#     define LOAD_RTLD_GLOBAL_RO_RDX
+#     define HAS_FEATURE(offset, name) \
+  testl $(bit_##name), _rtld_local_ro+offset+(index_##name)(%rip)
+#    else
+#      define LOAD_RTLD_GLOBAL_RO_RDX \
+  mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP
+#     define HAS_FEATURE(offset, name) \
+  testl $(bit_##name), \
+       RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%rdx)
+#    endif
+#   else /* SHARED */
+#    define LOAD_RTLD_GLOBAL_RO_RDX
+#    define HAS_FEATURE(offset, name) \
+  testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)(%rip)
+#   endif /* !SHARED */
+#  else  /* __x86_64__ */
+#   ifdef SHARED
+#    define LOAD_FUNC_GOT_EAX(func) \
+  leal func@GOTOFF(%edx), %eax
+#    if IS_IN (rtld)
+#    define LOAD_GOT_AND_RTLD_GLOBAL_RO \
+  LOAD_PIC_REG(dx)
+#     define HAS_FEATURE(offset, name) \
+  testl $(bit_##name), offset+(index_##name)+_rtld_local_ro@GOTOFF(%edx)
+#    else
+#     define LOAD_GOT_AND_RTLD_GLOBAL_RO \
+  LOAD_PIC_REG(dx); \
+  mov _rtld_global_ro@GOT(%edx), %ecx
+#     define HAS_FEATURE(offset, name) \
+  testl $(bit_##name), \
+       RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%ecx)
+#    endif
+#   else  /* SHARED */
+#    define LOAD_FUNC_GOT_EAX(func) \
+  leal func, %eax
+#    define LOAD_GOT_AND_RTLD_GLOBAL_RO
+#    define HAS_FEATURE(offset, name) \
+  testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)
+#   endif /* !SHARED */
+#  endif /* !__x86_64__ */
+# else /* _LIBC && !nonlib */
+#  error "Sorry, <cpu-features.h> is unimplemented for assembler"
+# endif /* !_LIBC || nonlib */
+
+/* HAS_* evaluates to true if we may use the feature at runtime.  */
+# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, name)
+# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, name)
+
+#else  /* __ASSEMBLER__ */
+
+enum
+  {
+    COMMON_CPUID_INDEX_1 = 0,
+    COMMON_CPUID_INDEX_7,
+    COMMON_CPUID_INDEX_80000001,       /* for AMD */
+    /* Keep the following line at the end.  */
+    COMMON_CPUID_INDEX_MAX
+  };
+
+struct cpu_features
+{
+  enum cpu_features_kind
+    {
+      arch_kind_unknown = 0,
+      arch_kind_intel,
+      arch_kind_amd,
+      arch_kind_other
+    } kind;
+  int max_cpuid;
+  struct cpuid_registers
+  {
+    unsigned int eax;
+    unsigned int ebx;
+    unsigned int ecx;
+    unsigned int edx;
+  } cpuid[COMMON_CPUID_INDEX_MAX];
+  unsigned int family;
+  unsigned int model;
+  unsigned int feature[FEATURE_INDEX_MAX];
+};
+
+/* Used from outside of glibc to get access to the CPU features
+   structure.  */
+extern const struct cpu_features *__get_cpu_features (void)
+     __attribute__ ((const));
+
+# if defined (_LIBC) && !IS_IN (nonlib)
+/* Unused for x86.  */
+#  define INIT_ARCH()
+#  define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
+# endif
+
+
+/* HAS_* evaluates to true if we may use the feature at runtime.  */
+# define HAS_CPU_FEATURE(name) \
+  ((__get_cpu_features ()->cpuid[index_##name].reg_##name & (bit_##name)) != 0)
+# define HAS_ARCH_FEATURE(name) \
+  ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
+
+# define index_SSE2            COMMON_CPUID_INDEX_1
+# define index_SSSE3           COMMON_CPUID_INDEX_1
+# define index_SSE4_1          COMMON_CPUID_INDEX_1
+# define index_SSE4_2          COMMON_CPUID_INDEX_1
+# define index_AVX             COMMON_CPUID_INDEX_1
+# define index_AVX2            COMMON_CPUID_INDEX_7
+# define index_AVX512F         COMMON_CPUID_INDEX_7
+# define index_AVX512DQ                COMMON_CPUID_INDEX_7
+# define index_RTM             COMMON_CPUID_INDEX_7
+# define index_FMA             COMMON_CPUID_INDEX_1
+# define index_FMA4            COMMON_CPUID_INDEX_80000001
+# define index_POPCOUNT                COMMON_CPUID_INDEX_1
+# define index_OSXSAVE         COMMON_CPUID_INDEX_1
+
+# define reg_SSE2              edx
+# define reg_SSSE3             ecx
+# define reg_SSE4_1            ecx
+# define reg_SSE4_2            ecx
+# define reg_AVX               ecx
+# define reg_AVX2              ebx
+# define reg_AVX512F           ebx
+# define reg_AVX512DQ          ebx
+# define reg_RTM               ebx
+# define reg_FMA               ecx
+# define reg_FMA4              ecx
+# define reg_POPCOUNT          ecx
+# define reg_OSXSAVE           ecx
+
+# define index_Fast_Rep_String         FEATURE_INDEX_1
+# define index_Fast_Copy_Backward      FEATURE_INDEX_1
+# define index_Slow_BSF                        FEATURE_INDEX_1
+# define index_Fast_Unaligned_Load     FEATURE_INDEX_1
+# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
+# define index_AVX_Usable              FEATURE_INDEX_1
+# define index_FMA_Usable              FEATURE_INDEX_1
+# define index_FMA4_Usable             FEATURE_INDEX_1
+# define index_Slow_SSE4_2             FEATURE_INDEX_1
+# define index_AVX2_Usable             FEATURE_INDEX_1
+# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
+# define index_AVX512F_Usable          FEATURE_INDEX_1
+# define index_AVX512DQ_Usable         FEATURE_INDEX_1
+
+#endif /* !__ASSEMBLER__ */
+
+#endif  /* cpu_features_h */
diff --git a/sysdeps/x86/dl-get-cpu-features.c b/sysdeps/x86/dl-get-cpu-features.c
new file mode 100644 (file)
index 0000000..080e5e8
--- /dev/null
@@ -0,0 +1,27 @@
+/* This file is part of the GNU C Library.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#include <ldsodefs.h>
+
+#undef __get_cpu_features
+
+const struct cpu_features *
+__get_cpu_features (void)
+{
+  return &GLRO(dl_x86_cpu_features);
+}
diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c
new file mode 100644 (file)
index 0000000..9f0c045
--- /dev/null
@@ -0,0 +1,41 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef SHARED
+# include <csu/libc-start.c>
+# else
+/* The main work is done in the generic function.  */
+# define LIBC_START_DISABLE_INLINE
+# define LIBC_START_MAIN generic_start_main
+# include <csu/libc-start.c>
+# include <cpu-features.h>
+# include <cpu-features.c>
+
+extern struct cpu_features _dl_x86_cpu_features;
+
+int
+__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
+                  int argc, char **argv,
+                  __typeof (main) init,
+                  void (*fini) (void),
+                  void (*rtld_fini) (void), void *stack_end)
+{
+  init_cpu_features (&_dl_x86_cpu_features);
+  return generic_start_main (main, argc, argv, init, fini, rtld_fini,
+                            stack_end);
+}
+#endif
diff --git a/sysdeps/x86/rtld-global-offsets.sym b/sysdeps/x86/rtld-global-offsets.sym
new file mode 100644 (file)
index 0000000..a9d53d1
--- /dev/null
@@ -0,0 +1,7 @@
+#define SHARED 1
+
+#include <ldsodefs.h>
+
+#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
+
+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features)
diff --git a/sysdeps/x86/tst-get-cpu-features-static.c b/sysdeps/x86/tst-get-cpu-features-static.c
new file mode 100644 (file)
index 0000000..03f5906
--- /dev/null
@@ -0,0 +1 @@
+#include "tst-get-cpu-features.c"
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
new file mode 100644 (file)
index 0000000..c17060f
--- /dev/null
@@ -0,0 +1,31 @@
+/* Test case for x86 __get_cpu_features interface
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <cpu-features.h>
+
+static int
+do_test (void)
+{
+  if (__get_cpu_features ()->kind == arch_kind_unknown)
+    abort ();
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
index b99fb9a..0ff5309 100644 (file)
 #include <stdlib.h>
 #include <unistd.h>
 #include <cpuid.h>
+#include "multiarch/init-arch.h"
 
-#ifndef __cpuid_count
-/* FIXME: Provide __cpuid_count if it isn't defined.  Copied from gcc
-   4.4.0.  Remove this if gcc 4.4 is the minimum requirement.  */
-# if defined(__i386__) && defined(__PIC__)
-/* %ebx may be the PIC register.  */
-#  define __cpuid_count(level, count, a, b, c, d)              \
-  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"                  \
-          "cpuid\n\t"                                  \
-          "xchg{l}\t{%%}ebx, %1\n\t"                   \
-          : "=a" (a), "=r" (b), "=c" (c), "=d" (d)     \
-          : "0" (level), "2" (count))
-# else
-#  define __cpuid_count(level, count, a, b, c, d)              \
-  __asm__ ("cpuid\n\t"                                 \
-          : "=a" (a), "=b" (b), "=c" (c), "=d" (d)     \
-          : "0" (level), "2" (count))
-# endif
-#endif
-
-#ifdef USE_MULTIARCH
-# include "multiarch/init-arch.h"
-
-# define is_intel __cpu_features.kind == arch_kind_intel
-# define is_amd __cpu_features.kind == arch_kind_amd
-# define max_cpuid __cpu_features.max_cpuid
-#else
-  /* This spells out "GenuineIntel".  */
-# define is_intel \
-  ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
-  /* This spells out "AuthenticAMD".  */
-# define is_amd \
-  ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
-#endif
+#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
+#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
+#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
 
 static const struct intel_02_cache_info
 {
@@ -235,21 +206,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
              /* Intel reused this value.  For family 15, model 6 it
                 specifies the 3rd level cache.  Otherwise the 2nd
                 level cache.  */
-             unsigned int family;
-             unsigned int model;
-#ifdef USE_MULTIARCH
-             family = __cpu_features.family;
-             model = __cpu_features.model;
-#else
-             unsigned int eax;
-             unsigned int ebx;
-             unsigned int ecx;
-             unsigned int edx;
-             __cpuid (1, eax, ebx, ecx, edx);
-
-             family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
-             model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
-#endif
+             unsigned int family = GLRO(dl_x86_cpu_features).family;
+             unsigned int model = GLRO(dl_x86_cpu_features).model;
 
              if (family == 15 && model == 6)
                {
@@ -476,18 +434,6 @@ long int
 attribute_hidden
 __cache_sysconf (int name)
 {
-#ifdef USE_MULTIARCH
-  if (__cpu_features.kind == arch_kind_unknown)
-    __init_cpu_features ();
-#else
-  /* Find out what brand of processor.  */
-  unsigned int max_cpuid;
-  unsigned int ebx;
-  unsigned int ecx;
-  unsigned int edx;
-  __cpuid (0, max_cpuid, ebx, ecx, edx);
-#endif
-
   if (is_intel)
     return handle_intel (name, max_cpuid);
 
@@ -523,18 +469,6 @@ long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
 int __x86_prefetchw attribute_hidden;
 #endif
 
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
-/* Instructions preferred for memory and string routines.
-
-  0: Regular instructions
-  1: MMX instructions
-  2: SSE2 instructions
-  3: SSSE3 instructions
-
-  */
-int __x86_preferred_memory_instruction attribute_hidden;
-#endif
-
 
 static void
 __attribute__((constructor))
@@ -551,14 +485,6 @@ init_cacheinfo (void)
   unsigned int level;
   unsigned int threads = 0;
 
-#ifdef USE_MULTIARCH
-  if (__cpu_features.kind == arch_kind_unknown)
-    __init_cpu_features ();
-#else
-  int max_cpuid;
-  __cpuid (0, max_cpuid, ebx, ecx, edx);
-#endif
-
   if (is_intel)
     {
       data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
@@ -574,34 +500,13 @@ init_cacheinfo (void)
          shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
        }
 
-      unsigned int ebx_1;
-
-#ifdef USE_MULTIARCH
-      eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
-      ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
-      ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
-      edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
-#else
-      __cpuid (1, eax, ebx_1, ecx, edx);
-#endif
-
-      unsigned int family = (eax >> 8) & 0x0f;
-      unsigned int model = (eax >> 4) & 0x0f;
-      unsigned int extended_model = (eax >> 12) & 0xf0;
-
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
-      /* Intel prefers SSSE3 instructions for memory/string routines
-        if they are available.  */
-      if ((ecx & 0x200))
-       __x86_preferred_memory_instruction = 3;
-      else
-       __x86_preferred_memory_instruction = 2;
-#endif
-
       /* Figure out the number of logical threads that share the
         highest cache level.  */
       if (max_cpuid >= 4)
        {
+         unsigned int family = GLRO(dl_x86_cpu_features).family;
+         unsigned int model = GLRO(dl_x86_cpu_features).model;
+
          int i = 0;
 
          /* Query until desired cache level is enumerated.  */
@@ -653,7 +558,6 @@ init_cacheinfo (void)
          threads += 1;
          if (threads > 2 && level == 2 && family == 6)
            {
-             model += extended_model;
              switch (model)
                {
                case 0x57:
@@ -676,7 +580,9 @@ init_cacheinfo (void)
        intel_bug_no_cache_info:
          /* Assume that all logical threads share the highest cache level.  */
 
-         threads = (ebx_1 >> 16) & 0xff;
+         threads
+           = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
+               >> 16) & 0xff);
        }
 
       /* Cap usage of highest cache level to the number of supported
@@ -691,25 +597,6 @@ init_cacheinfo (void)
       long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
       shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
 
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
-# ifdef USE_MULTIARCH
-      eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
-      ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
-      ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
-      edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
-# else
-      __cpuid (1, eax, ebx, ecx, edx);
-# endif
-
-      /* AMD prefers SSSE3 instructions for memory/string routines
-        if they are avaiable, otherwise it prefers integer
-        instructions.  */
-      if ((ecx & 0x200))
-       __x86_preferred_memory_instruction = 3;
-      else
-       __x86_preferred_memory_instruction = 0;
-#endif
-
       /* Get maximum extended function. */
       __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
 
index cae6db3..d22359d 100644 (file)
@@ -26,6 +26,7 @@
 #include <sysdep.h>
 #include <tls.h>
 #include <dl-tlsdesc.h>
+#include <cpu-features.c>
 
 /* Return nonzero iff ELF header is compatible with the running host.  */
 static inline int __attribute__ ((unused))
@@ -205,6 +206,8 @@ dl_platform_init (void)
   if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
     /* Avoid an empty string which would disturb us.  */
     GLRO(dl_platform) = NULL;
+
+  init_cpu_features (&GLRO(dl_x86_cpu_features));
 }
 
 static inline ElfW(Addr)
diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
new file mode 100644 (file)
index 0000000..851681a
--- /dev/null
@@ -0,0 +1,57 @@
+/* Data for x86-64 version of processor capability information.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* If anything should be added here check whether the size of each string
+   is still ok with the given array size.
+
+   All the #ifdefs in the definitions are quite irritating but
+   necessary if we want to avoid duplicating the information.  There
+   are three different modes:
+
+   - PROCINFO_DECL is defined.  This means we are only interested in
+     declarations.
+
+   - PROCINFO_DECL is not defined:
+
+     + if SHARED is defined the file is included in an array
+       initializer.  The .element = { ... } syntax is needed.
+
+     + if SHARED is not defined a normal array initialization is
+       needed.
+  */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !defined PROCINFO_DECL && defined SHARED
+  ._dl_x86_cpu_features
+#else
+PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features
+#endif
+#ifndef PROCINFO_DECL
+= { }
+#endif
+#if !defined SHARED || defined PROCINFO_DECL
+;
+#else
+,
+#endif
+
+#undef PROCINFO_DECL
+#undef PROCINFO_CLASS
index 84d36e8..e3f2da2 100644 (file)
@@ -20,6 +20,7 @@
 #define        _X86_64_LDSODEFS_H      1
 
 #include <elf.h>
+#include <cpu-features.h>
 
 struct La_x86_64_regs;
 struct La_x86_64_retval;
index d7002a9..d10b4d4 100644 (file)
@@ -1,5 +1,4 @@
 ifeq ($(subdir),csu)
-aux += init-arch
 tests += test-multiarch
 gen-as-const-headers += ifunc-defines.sym
 endif
diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions
deleted file mode 100644 (file)
index 59b185a..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-libc {
-  GLIBC_PRIVATE {
-    __get_cpu_features;
-  }
-}
diff --git a/sysdeps/x86_64/multiarch/cacheinfo.c b/sysdeps/x86_64/multiarch/cacheinfo.c
deleted file mode 100644 (file)
index f87b8dc..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
-#include "../cacheinfo.c"
index a410d88..3df946f 100644 (file)
@@ -4,7 +4,6 @@
 --
 
 CPU_FEATURES_SIZE      sizeof (struct cpu_features)
-KIND_OFFSET            offsetof (struct cpu_features, kind)
 CPUID_OFFSET           offsetof (struct cpu_features, cpuid)
 CPUID_SIZE             sizeof (struct cpuid_registers)
 CPUID_EAX_OFFSET       offsetof (struct cpuid_registers, eax)
index cfc6e70..2b9988e 100644 (file)
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#define bit_Fast_Rep_String            (1 << 0)
-#define bit_Fast_Copy_Backward         (1 << 1)
-#define bit_Slow_BSF                   (1 << 2)
-#define bit_Fast_Unaligned_Load                (1 << 4)
-#define bit_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_AVX_Usable                 (1 << 6)
-#define bit_FMA_Usable                 (1 << 7)
-#define bit_FMA4_Usable                        (1 << 8)
-#define bit_Slow_SSE4_2                        (1 << 9)
-#define bit_AVX2_Usable                        (1 << 10)
-#define bit_AVX_Fast_Unaligned_Load    (1 << 11)
-#define bit_AVX512F_Usable             (1 << 12)
-#define bit_AVX512DQ_Usable            (1 << 13)
-
-/* CPUID Feature flags.  */
-
-/* COMMON_CPUID_INDEX_1.  */
-#define bit_SSE2       (1 << 26)
-#define bit_SSSE3      (1 << 9)
-#define bit_SSE4_1     (1 << 19)
-#define bit_SSE4_2     (1 << 20)
-#define bit_OSXSAVE    (1 << 27)
-#define bit_AVX                (1 << 28)
-#define bit_POPCOUNT   (1 << 23)
-#define bit_FMA                (1 << 12)
-#define bit_FMA4       (1 << 16)
-
-/* COMMON_CPUID_INDEX_7.  */
-#define bit_RTM                (1 << 11)
-#define bit_AVX2       (1 << 5)
-#define bit_AVX512F    (1 << 16)
-#define bit_AVX512DQ   (1 << 17)
-
-/* XCR0 Feature flags.  */
-#define bit_XMM_state  (1 << 1)
-#define bit_YMM_state  (2 << 1)
-#define bit_Opmask_state       (1 << 5)
-#define bit_ZMM0_15_state      (1 << 6)
-#define bit_ZMM16_31_state     (1 << 7)
-
-/* The integer bit array index for the first set of internal feature bits.  */
-# define FEATURE_INDEX_1 0
-
-/* The current maximum size of the feature integer bit array.  */
-# define FEATURE_INDEX_MAX 1
-
-#ifdef __ASSEMBLER__
-
-# include <ifunc-defines.h>
-
-# define index_SSE2    COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
-# define index_SSSE3   COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_SSE4_1  COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_SSE4_2  COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_AVX     COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_AVX2    COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
-
-# define index_Fast_Rep_String         FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Fast_Copy_Backward      FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Slow_BSF                        FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Fast_Unaligned_Load     FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX_Usable              FEATURE_INDEX_1*FEATURE_SIZE
-# define index_FMA_Usable              FEATURE_INDEX_1*FEATURE_SIZE
-# define index_FMA4_Usable             FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Slow_SSE4_2             FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX2_Usable             FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX512F_Usable          FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX512DQ_Usable         FEATURE_INDEX_1*FEATURE_SIZE
-
-#else  /* __ASSEMBLER__ */
-
-# include <sys/param.h>
-
-enum
-  {
-    COMMON_CPUID_INDEX_1 = 0,
-    COMMON_CPUID_INDEX_7,
-    COMMON_CPUID_INDEX_80000001,       /* for AMD */
-    /* Keep the following line at the end.  */
-    COMMON_CPUID_INDEX_MAX
-  };
-
-extern struct cpu_features
-{
-  enum cpu_features_kind
-    {
-      arch_kind_unknown = 0,
-      arch_kind_intel,
-      arch_kind_amd,
-      arch_kind_other
-    } kind;
-  int max_cpuid;
-  struct cpuid_registers
-  {
-    unsigned int eax;
-    unsigned int ebx;
-    unsigned int ecx;
-    unsigned int edx;
-  } cpuid[COMMON_CPUID_INDEX_MAX];
-  unsigned int family;
-  unsigned int model;
-  unsigned int feature[FEATURE_INDEX_MAX];
-} __cpu_features attribute_hidden;
-
-
-extern void __init_cpu_features (void) attribute_hidden;
-# define INIT_ARCH() \
-  do                                                   \
-    if (__cpu_features.kind == arch_kind_unknown)      \
-      __init_cpu_features ();                          \
-  while (0)
-
-/* Used from outside libc.so to get access to the CPU features structure.  */
-extern const struct cpu_features *__get_cpu_features (void)
-     __attribute__ ((const));
-
-# if IS_IN (libc)
-#  define __get_cpu_features() (&__cpu_features)
-# endif
-
-# define HAS_CPU_FEATURE(idx, reg, bit) \
-  ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0)
-
-/* Following are the feature tests used throughout libc.  */
-
-/* CPUID_* evaluates to true if the feature flag is enabled.
-   We always use &__cpu_features because the HAS_CPUID_* macros
-   are called only within __init_cpu_features, where we can't
-   call __get_cpu_features without infinite recursion.  */
-# define HAS_CPUID_FLAG(idx, reg, bit) \
-  (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)
-
-# define CPUID_OSXSAVE \
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
-# define CPUID_AVX \
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
-# define CPUID_FMA \
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define CPUID_FMA4 \
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
-# define CPUID_RTM \
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
-# define CPUID_AVX2 \
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
-# define CPUID_AVX512F \
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512F)
-# define CPUID_AVX512DQ \
-  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512DQ)
-
-/* HAS_* evaluates to true if we may use the feature at runtime.  */
-# define HAS_SSE2      HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
-# define HAS_POPCOUNT  HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT)
-# define HAS_SSSE3     HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
-# define HAS_SSE4_1    HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
-# define HAS_SSE4_2    HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
-# define HAS_RTM       HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
-
-# define index_Fast_Rep_String         FEATURE_INDEX_1
-# define index_Fast_Copy_Backward      FEATURE_INDEX_1
-# define index_Slow_BSF                        FEATURE_INDEX_1
-# define index_Fast_Unaligned_Load     FEATURE_INDEX_1
-# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
-# define index_AVX_Usable              FEATURE_INDEX_1
-# define index_FMA_Usable              FEATURE_INDEX_1
-# define index_FMA4_Usable             FEATURE_INDEX_1
-# define index_Slow_SSE4_2             FEATURE_INDEX_1
-# define index_AVX2_Usable             FEATURE_INDEX_1
-# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_AVX512F_Usable          FEATURE_INDEX_1
-# define index_AVX512DQ_Usable         FEATURE_INDEX_1
-
-# define HAS_ARCH_FEATURE(name) \
-  ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
-
-# define HAS_FAST_REP_STRING           HAS_ARCH_FEATURE (Fast_Rep_String)
-# define HAS_FAST_COPY_BACKWARD                HAS_ARCH_FEATURE (Fast_Copy_Backward)
-# define HAS_SLOW_BSF                  HAS_ARCH_FEATURE (Slow_BSF)
-# define HAS_FAST_UNALIGNED_LOAD       HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-# define HAS_AVX                       HAS_ARCH_FEATURE (AVX_Usable)
-# define HAS_AVX2                      HAS_ARCH_FEATURE (AVX2_Usable)
-# define HAS_AVX512F                   HAS_ARCH_FEATURE (AVX512F_Usable)
-# define HAS_AVX512DQ                  HAS_ARCH_FEATURE (AVX512DQ_Usable)
-# define HAS_FMA                       HAS_ARCH_FEATURE (FMA_Usable)
-# define HAS_FMA4                      HAS_ARCH_FEATURE (FMA4_Usable)
-# define HAS_AVX_FAST_UNALIGNED_LOAD   HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-
-#endif /* __ASSEMBLER__ */
+#ifdef  __ASSEMBLER__
+# include <cpu-features.h>
+#else
+# include <ldsodefs.h>
+#endif