{
unsigned long hwcap = get_hwcap();
+ // Choose the architecture level for optimized kernels based on hardware
+ // capability bits (just like glibc chooses optimized implementations).
+ //
+ // The hardware capability bits that are used here indicate both
+ // hardware support for a particular ISA extension and the presence of
+ // software support to enable its use. For example, when HWCAP_S390_VX
+ // is set then both the CPU can execute SIMD instructions and the Linux
+ // kernel can manage applications using the vector registers and SIMD
+ // instructions.
+ //
+ // See glibc's sysdeps/s390/dl-procinfo.h for an overview (also in
+ // sysdeps/unix/sysv/linux/s390/bits/hwcap.h) of the defined hardware
+ // capability bits. They are derived from the information that the
+ // "store facility list (extended)" instructions provide.
+ // (https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/s390/dl-procinfo.h;hb=HEAD)
+ //
+ // currently used:
+ // HWCAP_S390_VX - vector facility for z/Architecture (introduced with
+ // IBM z13), enables level CPU_Z13 (SIMD)
+ // HWCAP_S390_VXE - vector enhancements facility 1 (introduced with IBM
+ // z14), together with VX enables level CPU_Z14
+ // (single-precision SIMD instructions)
+ //
+ // When you add optimized kernels that make use of other ISA extensions
+ // (e.g., for exploiting the vector-enhancements facility 2 that was introduced
+ // with IBM z15), then add a new architecture level (e.g., CPU_Z15) and gate
+ // it on the hwcap that represents it here (e.g., HWCAP_S390_VXRS_EXT2
+ // for the z15 vector enhancements).
+ //
+ // To learn the value of hwcaps on a given system, set the environment
+ // variable LD_SHOW_AUXV and let ld.so dump it (e.g., by running
+ // LD_SHOW_AUXV=1 /bin/true).
+ // Also, the init function for dynamic arch support will print hwcaps
+ // when OPENBLAS_VERBOSE is set to 2 or higher.
if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE))
return CPU_Z14;
#define NUM_CORETYPES 4
+extern int openblas_verbose();
extern void openblas_warning(int verbose, const char* msg);
char* gotoblas_corename(void) {
else
{
gotoblas = get_coretype();
+ if (openblas_verbose() >= 2) {
+ snprintf(coremsg, sizeof(coremsg), "Choosing kernels based on getauxval(AT_HWCAP)=0x%lx\n",
+ getauxval(AT_HWCAP));
+ openblas_warning(2, coremsg);
+ }
}
if (gotoblas == NULL)
}
if (gotoblas && gotoblas->init) {
- strncpy(coren, gotoblas_corename(), 20);
- sprintf(coremsg, "Core: %s\n", coren);
- openblas_warning(2, coremsg);
+ if (openblas_verbose() >= 2) {
+ strncpy(coren, gotoblas_corename(), 20);
+ sprintf(coremsg, "Core: %s\n", coren);
+ openblas_warning(2, coremsg);
+ }
gotoblas->init();
}
else {