From bcdf1d49170508fd5c8250f802dd9018b7771534 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Tue, 9 Apr 2019 14:13:24 +1000 Subject: [PATCH] Add in runtime CPU detection for POWER. --- Makefile.system | 6 +++ driver/others/Makefile | 8 ++++ driver/others/dynamic_power.c | 102 ++++++++++++++++++++++++++++++++++++++++++ kernel/power/KERNEL.POWER8 | 32 ++++++------- kernel/power/KERNEL.POWER9 | 32 ++++++------- kernel/setparam-ref.c | 22 +++++++++ 6 files changed, 170 insertions(+), 32 deletions(-) create mode 100644 driver/others/dynamic_power.c diff --git a/Makefile.system b/Makefile.system index 53f89b2..a95d619 100644 --- a/Makefile.system +++ b/Makefile.system @@ -528,6 +528,12 @@ DYNAMIC_CORE += THUNDERX DYNAMIC_CORE += THUNDERX2T99 endif +ifeq ($(ARCH), power) +DYNAMIC_CORE = POWER6 +DYNAMIC_CORE += POWER8 +DYNAMIC_CORE += POWER9 +endif + # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty ifndef DYNAMIC_CORE override DYNAMIC_ARCH= diff --git a/driver/others/Makefile b/driver/others/Makefile index 3dc2e7c..d4b5c26 100644 --- a/driver/others/Makefile +++ b/driver/others/Makefile @@ -18,8 +18,12 @@ ifeq ($(DYNAMIC_ARCH), 1) ifeq ($(ARCH),arm64) COMMONOBJS += dynamic_arm64.$(SUFFIX) else +ifeq ($(ARCH),power) +COMMONOBJS += dynamic_power.$(SUFFIX) +else COMMONOBJS += dynamic.$(SUFFIX) endif +endif else COMMONOBJS += parameter.$(SUFFIX) endif @@ -78,8 +82,12 @@ ifeq ($(DYNAMIC_ARCH), 1) ifeq ($(ARCH),arm64) HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX) else +ifeq ($(ARCH),power) +HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX) +else HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) endif +endif else HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) endif diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c new file mode 100644 index 0000000..0c4a87a --- /dev/null +++ b/driver/others/dynamic_power.c @@ -0,0 +1,102 @@ + +#include "common.h" + +extern gotoblas_t gotoblas_POWER6; +extern gotoblas_t gotoblas_POWER8; +extern gotoblas_t gotoblas_POWER9; + +extern void openblas_warning(int verbose, const char *msg); + +static char *corename[] = { + "unknown", + "POWER6", + "POWER8", + "POWER9" +}; + +#define NUM_CORETYPES 4 + +char *gotoblas_corename(void) { + if (gotoblas == &gotoblas_POWER6) return corename[1]; + if (gotoblas == &gotoblas_POWER8) return corename[2]; + if (gotoblas == &gotoblas_POWER9) return corename[3]; + return corename[0]; +} + +static gotoblas_t *get_coretype(void) { + + if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) + return &gotoblas_POWER6; + if (__builtin_cpu_is("power8")) + return &gotoblas_POWER8; + if (__builtin_cpu_is("power9")) + return &gotoblas_POWER9; + return NULL; +} + +static gotoblas_t *force_coretype(char * coretype) { + + int i ; + int found = -1; + char message[128]; + + for ( i = 0 ; i < NUM_CORETYPES; i++) + { + if (!strncasecmp(coretype, corename[i], 20)) + { + found = i; + break; + } + } + + switch (found) + { + case 1: return (&gotoblas_POWER6); + case 2: return (&gotoblas_POWER8); + case 3: return (&gotoblas_POWER9); + default: return NULL; + } + snprintf(message, 128, "Core not found: %s\n", coretype); + openblas_warning(1, message); +} + +void gotoblas_dynamic_init(void) { + + char coremsg[128]; + char coren[22]; + char *p; + + + if (gotoblas) return; + + p = getenv("OPENBLAS_CORETYPE"); + if ( p ) + { + gotoblas = force_coretype(p); + } + else + { + gotoblas = get_coretype(); + } + + if (gotoblas == NULL) + { + snprintf(coremsg, 128, "Falling back to POWER8 core\n"); + openblas_warning(1, coremsg); + gotoblas = &gotoblas_POWER8; + } + + if (gotoblas && gotoblas -> init) { + strncpy(coren,gotoblas_corename(),20); + sprintf(coremsg, "Core: %s\n",coren); + openblas_warning(2, coremsg); + gotoblas -> init(); + } else { + openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); + exit(1); + } +} + +void gotoblas_dynamic_quit(void) { + gotoblas = NULL; +} diff --git a/kernel/power/KERNEL.POWER8 b/kernel/power/KERNEL.POWER8 index e6f69c7..43f004f 100644 --- a/kernel/power/KERNEL.POWER8 +++ b/kernel/power/KERNEL.POWER8 @@ -13,40 +13,40 @@ SGEMMINCOPY = ../generic/gemm_ncopy_16.c SGEMMITCOPY = sgemm_tcopy_16_power8.S SGEMMONCOPY = ../generic/gemm_ncopy_8.c SGEMMOTCOPY = sgemm_tcopy_8_power8.S -SGEMMINCOPYOBJ = sgemm_incopy.o -SGEMMITCOPYOBJ = sgemm_itcopy.o -SGEMMONCOPYOBJ = sgemm_oncopy.o -SGEMMOTCOPYOBJ = sgemm_otcopy.o +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) DGEMMKERNEL = dgemm_kernel_16x4_power8.S DGEMMINCOPY = ../generic/gemm_ncopy_16.c DGEMMITCOPY = dgemm_tcopy_16_power8.S DGEMMONCOPY = dgemm_ncopy_4_power8.S DGEMMOTCOPY = ../generic/gemm_tcopy_4.c -DGEMMINCOPYOBJ = dgemm_incopy.o -DGEMMITCOPYOBJ = dgemm_itcopy.o -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) CGEMMKERNEL = cgemm_kernel_8x4_power8.S CGEMMINCOPY = ../generic/zgemm_ncopy_8.c CGEMMITCOPY = cgemm_tcopy_8_power8.S CGEMMONCOPY = ../generic/zgemm_ncopy_4.c CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o -CGEMMINCOPYOBJ = cgemm_incopy.o -CGEMMITCOPYOBJ = cgemm_itcopy.o +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) ZGEMMKERNEL = zgemm_kernel_8x2_power8.S ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c ZGEMMITCOPY = zgemm_tcopy_8_power8.S -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o -ZGEMMINCOPYOBJ = zgemm_incopy.o -ZGEMMITCOPYOBJ = zgemm_itcopy.o +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c diff --git a/kernel/power/KERNEL.POWER9 b/kernel/power/KERNEL.POWER9 index 86a9319..e166f25 100644 --- a/kernel/power/KERNEL.POWER9 +++ b/kernel/power/KERNEL.POWER9 @@ -13,40 +13,40 @@ SGEMMINCOPY = ../generic/gemm_ncopy_16.c SGEMMITCOPY = sgemm_tcopy_16_power8.S SGEMMONCOPY = ../generic/gemm_ncopy_8.c SGEMMOTCOPY = sgemm_tcopy_8_power8.S -SGEMMINCOPYOBJ = sgemm_incopy.o -SGEMMITCOPYOBJ = sgemm_itcopy.o -SGEMMONCOPYOBJ = sgemm_oncopy.o -SGEMMOTCOPYOBJ = sgemm_otcopy.o +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) DGEMMKERNEL = dgemm_kernel_power9.S DGEMMINCOPY = ../generic/gemm_ncopy_16.c DGEMMITCOPY = dgemm_tcopy_16_power8.S DGEMMONCOPY = dgemm_ncopy_4_power8.S DGEMMOTCOPY = ../generic/gemm_tcopy_4.c -DGEMMINCOPYOBJ = dgemm_incopy.o -DGEMMITCOPYOBJ = dgemm_itcopy.o -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) CGEMMKERNEL = cgemm_kernel_8x4_power8.S CGEMMINCOPY = ../generic/zgemm_ncopy_8.c CGEMMITCOPY = cgemm_tcopy_8_power8.S CGEMMONCOPY = ../generic/zgemm_ncopy_4.c CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o -CGEMMINCOPYOBJ = cgemm_incopy.o -CGEMMITCOPYOBJ = cgemm_itcopy.o +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) ZGEMMKERNEL = zgemm_kernel_8x2_power8.S ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c ZGEMMITCOPY = zgemm_tcopy_8_power8.S -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o -ZGEMMINCOPYOBJ = zgemm_incopy.o -ZGEMMITCOPYOBJ = zgemm_itcopy.o +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 6d4028b..b964a8b 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -718,6 +718,27 @@ static void init_parameter(void) { } #else // defined(ARCH_ARM64) +#if defined(ARCH_POWER) +static void init_parameter(void) { + + TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; + TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; + TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; + TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; + + TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; + TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; + TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; + TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; + + + TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; + TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; + TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; + TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; +} +#else //POWER + #ifdef ARCH_X86 static int get_l2_size_old(void){ int i, eax, ebx, ecx, edx, cpuid_level; @@ -1303,4 +1324,5 @@ static void init_parameter(void) { } +#endif //POWER #endif //defined(ARCH_ARM64) -- 2.7.4