8 static char *cpuname[] = {
15 static char *cpuname_lower[] = {
22 // Guard the use of getauxval() on glibc version >= 2.16
25 #if __GLIBC_PREREQ(2, 16)
27 #define HAVE_GETAUXVAL 1
29 static unsigned long get_hwcap(void)
31 unsigned long hwcap = getauxval(AT_HWCAP);
34 // honor requests for not using specific CPU features in LD_HWCAP_MASK
35 maskenv = getenv("LD_HWCAP_MASK");
37 hwcap &= strtoul(maskenv, NULL, 0);
40 // note that a missing auxval is interpreted as no capabilities
41 // available, which is safe.
44 #else // __GLIBC_PREREQ(2, 16)
45 #warn "Cannot detect SIMD support in Z13 or newer architectures since glibc is older than 2.16"
47 static unsigned long get_hwcap(void) {
48 // treat missing support for getauxval() as no capabilities available,
52 #endif // __GLIBC_PREREQ(2, 16)
55 static int detect(void)
57 unsigned long hwcap = get_hwcap();
59 // Choose the architecture level for optimized kernels based on hardware
60 // capability bits (just like glibc chooses optimized implementations).
62 // The hardware capability bits that are used here indicate both
63 // hardware support for a particular ISA extension and the presence of
64 // software support to enable its use. For example, when HWCAP_S390_VX
65 // is set then both the CPU can execute SIMD instructions and the Linux
66 // kernel can manage applications using the vector registers and SIMD
69 // See glibc's sysdeps/s390/dl-procinfo.h for an overview (also in
70 // sysdeps/unix/sysv/linux/s390/bits/hwcap.h) of the defined hardware
71 // capability bits. They are derived from the information that the
72 // "store facility list (extended)" instructions provide.
73 // (https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/s390/dl-procinfo.h;hb=HEAD)
76 // HWCAP_S390_VX - vector facility for z/Architecture (introduced with
77 // IBM z13), enables level CPU_Z13 (SIMD)
78 // HWCAP_S390_VXE - vector enhancements facility 1 (introduced with IBM
79 // z14), together with VX enables level CPU_Z14
80 // (single-precision SIMD instructions)
82 // When you add optimized kernels that make use of other ISA extensions
83 // (e.g., for exploiting the vector-enhancements facility 2 that was introduced
84 // with IBM z15), then add a new architecture level (e.g., CPU_Z15) and gate
85 // it on the hwcap that represents it here (e.g., HWCAP_S390_VXRS_EXT2
86 // for the z15 vector enhancements).
88 // To learn the value of hwcaps on a given system, set the environment
89 // variable LD_SHOW_AUXV and let ld.so dump it (e.g., by running
90 // LD_SHOW_AUXV=1 /bin/true).
91 // Also, the init function for dynamic arch support will print hwcaps
92 // when OPENBLAS_VERBOSE is set to 2 or higher.
93 if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE))
96 if (hwcap & HWCAP_S390_VX)