From 410afda9b44e1920bf3f7b6bdcd852da0f4c4da9 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Thu, 21 Nov 2013 20:18:51 +0100 Subject: [PATCH] added cpu detection and target ARMV6, used in raspberry pi --- Makefile.arm | 9 +- cpuid_arm.c | 262 ++++++++++++++++++++++++++++++++++++++++++++++++ getarch.c | 39 ++++++- kernel/arm/KERNEL.ARMV6 | 134 +++++++++++++++++++++++++ param.h | 40 ++++++++ 5 files changed, 477 insertions(+), 7 deletions(-) create mode 100644 cpuid_arm.c create mode 100644 kernel/arm/KERNEL.ARMV6 diff --git a/Makefile.arm b/Makefile.arm index 6cdeb2f..8502d52 100644 --- a/Makefile.arm +++ b/Makefile.arm @@ -1,7 +1,12 @@ ifeq ($(CORE), ARMV7) -CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard +CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a +FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a +endif + +ifeq ($(CORE), ARMV6) +CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 +FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 endif diff --git a/cpuid_arm.c b/cpuid_arm.c new file mode 100644 index 0000000..efd1369 --- /dev/null +++ b/cpuid_arm.c @@ -0,0 +1,262 @@ +/************************************************************************** + Copyright (c) 2013, The OpenBLAS Project + All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +#include + +#define CPU_UNKNOWN 0 +#define CPU_ARMV6 1 +#define CPU_ARMV7 2 +#define CPU_CORTEXA15 3 + +static char *cpuname[] = { + "UNKOWN", + "ARMV6", + "ARMV7", + "CORTEXA15" +}; + + +int get_feature(char *search) +{ + +#ifdef linux + FILE *infile; + char buffer[2048], *p,*t; + p = (char *) NULL ; + + infile = fopen("/proc/cpuinfo", "r"); + + while (fgets(buffer, sizeof(buffer), infile)) + { + + if (!strncmp("Features", buffer, 8)) + { + p = strchr(buffer, ':') + 2; + break; + } + } + + fclose(infile); + + + if( p == NULL ) return; + + t = strtok(p," "); + while( t = strtok(NULL," ")) + { + if (!strcmp(t, search)) { return(1); } + } + +#endif + return(0); +} + + +int detect(void) +{ + +#ifdef linux + + FILE *infile; + char buffer[512], *p; + p = (char *) NULL ; + + infile = fopen("/proc/cpuinfo", "r"); + + while (fgets(buffer, sizeof(buffer), infile)) + { + + if (!strncmp("model name", buffer, 10)) + { + p = strchr(buffer, ':') + 2; + break; + } + } + + fclose(infile); + + if(p != NULL) + { + + if (strstr(p, "ARMv7")) + { + if ( get_feature("vfpv4")) + return CPU_ARMV7; + + if ( get_feature("vfpv3")) + return CPU_ARMV7; + + if ( get_feature("vfp")) + return CPU_ARMV6; + + + } + + if (strstr(p, "ARMv6")) + { + if ( get_feature("vfp")) + return CPU_ARMV6; + } + + + } +#endif + + return CPU_UNKNOWN; +} + +char *get_corename(void) +{ + return cpuname[detect()]; +} + +void get_architecture(void) +{ + printf("ARM"); +} + +void get_subarchitecture(void) +{ + int d = detect(); + switch (d) + { + + case CPU_ARMV7: + printf("ARMV7"); + break; + + case CPU_ARMV6: + printf("ARMV6"); + break; + + default: + printf("UNKNOWN"); + break; + } +} + +void get_subdirname(void) +{ + printf("arm"); +} + +void get_cpuconfig(void) +{ + + int d = detect(); + switch (d) + { + + case CPU_ARMV7: + printf("#define ARMV7\n"); + printf("#define HAVE_VFP\n"); + printf("#define HAVE_VFPV3\n"); + if ( get_feature("neon")) printf("#define HAVE_NEON\n"); + if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 32\n"); + printf("#define L2_SIZE 512488\n"); + printf("#define L2_LINESIZE 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 4\n"); + break; + + case CPU_ARMV6: + printf("#define ARMV6\n"); + printf("#define HAVE_VFP\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 32\n"); + printf("#define L2_SIZE 512488\n"); + printf("#define L2_LINESIZE 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 4\n"); + break; + + } +} + + +void get_libname(void) +{ + + int d = detect(); + switch (d) + { + + case CPU_ARMV7: + printf("armv7\n"); + break; + + case CPU_ARMV6: + printf("armv6\n"); + break; + + } +} + + +void get_features(void) +{ + +#ifdef linux + FILE *infile; + char buffer[2048], *p,*t; + p = (char *) NULL ; + + infile = fopen("/proc/cpuinfo", "r"); + + while (fgets(buffer, sizeof(buffer), infile)) + { + + if (!strncmp("Features", buffer, 8)) + { + p = strchr(buffer, ':') + 2; + break; + } + } + + fclose(infile); + + + if( p == NULL ) return; + + t = strtok(p," "); + while( t = strtok(NULL," ")) + { + if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; } + if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; } + if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; } + if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; } + } + +#endif + return; +} + + diff --git a/getarch.c b/getarch.c index 3264a76..4407e3d 100644 --- a/getarch.c +++ b/getarch.c @@ -687,23 +687,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DARMV7 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ - "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ + "-DHAVE_VFPV3 -DHAVE_VFP" #define LIBNAME "armv7" #define CORENAME "ARMV7" #else #endif +#ifdef FORCE_ARMV6 +#define FORCE +#define ARCHITECTURE "ARM" +#define SUBARCHITECTURE "ARMV6" +#define SUBDIRNAME "arm" +#define ARCHCONFIG "-DARMV6 " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ + "-DHAVE_VFP" +#define LIBNAME "armv6" +#define CORENAME "ARMV6" +#else +#endif + + #ifndef FORCE #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ - defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) + defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) + #ifndef POWER #define POWER #endif #define OPENBLAS_SUPPORTED #endif + #if defined(__i386__) || (__x86_64__) #include "cpuid_x86.c" #define OPENBLAS_SUPPORTED @@ -734,12 +753,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define OPENBLAS_SUPPORTED #endif +#ifdef __arm__ +#include "cpuid_arm.c" +#define OPENBLAS_SUPPORTED +#endif + + #ifndef OPENBLAS_SUPPORTED #error "This arch/CPU is not supported by OpenBLAS." #endif -#else - #endif static int get_num_cores(void) { @@ -788,7 +811,7 @@ int main(int argc, char *argv[]){ #ifdef FORCE printf("CORE=%s\n", CORENAME); #else -#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) +#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) printf("CORE=%s\n", get_corename()); #endif #endif @@ -803,6 +826,12 @@ int main(int argc, char *argv[]){ printf("NUM_CORES=%d\n", get_num_cores()); +#if defined(__arm__) && !defined(FORCE) + get_features(); +#endif + + + #if defined(__i386__) || defined(__x86_64__) #ifndef FORCE get_sse(); diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6 new file mode 100644 index 0000000..e379347 --- /dev/null +++ b/kernel/arm/KERNEL.ARMV6 @@ -0,0 +1,134 @@ +SAMAXKERNEL = amax.c +DAMAXKERNEL = amax.c +CAMAXKERNEL = zamax.c +ZAMAXKERNEL = zamax.c + +SAMINKERNEL = amin.c +DAMINKERNEL = amin.c +CAMINKERNEL = zamin.c +ZAMINKERNEL = zamin.c + +SMAXKERNEL = max.c +DMAXKERNEL = max.c + +SMINKERNEL = min.c +DMINKERNEL = min.c + +ISAMAXKERNEL = iamax.c +IDAMAXKERNEL = iamax.c +ICAMAXKERNEL = izamax.c +IZAMAXKERNEL = izamax.c + +ISAMINKERNEL = iamin.c +IDAMINKERNEL = iamin.c +ICAMINKERNEL = izamin.c +IZAMINKERNEL = izamin.c + +ISMAXKERNEL = imax.c +IDMAXKERNEL = imax.c + +ISMINKERNEL = imin.c +IDMINKERNEL = imin.c + +SASUMKERNEL = asum.c +DASUMKERNEL = asum.c +CASUMKERNEL = zasum.c +ZASUMKERNEL = zasum.c + +SAXPYKERNEL = axpy.c +DAXPYKERNEL = axpy.c +CAXPYKERNEL = zaxpy.c +ZAXPYKERNEL = zaxpy.c + +SCOPYKERNEL = copy.c +DCOPYKERNEL = copy.c +CCOPYKERNEL = zcopy.c +ZCOPYKERNEL = zcopy.c + +SDOTKERNEL = dot.c +DDOTKERNEL = dot.c +CDOTKERNEL = zdot.c +ZDOTKERNEL = zdot.c + +SNRM2KERNEL = nrm2.c +DNRM2KERNEL = nrm2.c +CNRM2KERNEL = znrm2.c +ZNRM2KERNEL = znrm2.c + +SROTKERNEL = rot.c +DROTKERNEL = rot.c +CROTKERNEL = zrot.c +ZROTKERNEL = zrot.c + +SSCALKERNEL = scal.c +DSCALKERNEL = scal.c +CSCALKERNEL = zscal.c +ZSCALKERNEL = zscal.c + +SSWAPKERNEL = swap.c +DSWAPKERNEL = swap.c +CSWAPKERNEL = zswap.c +ZSWAPKERNEL = zswap.c + +SGEMVNKERNEL = gemv_n.c +DGEMVNKERNEL = gemv_n.c +CGEMVNKERNEL = zgemv_n.c +ZGEMVNKERNEL = zgemv_n.c + +SGEMVTKERNEL = gemv_t.c +DGEMVTKERNEL = gemv_t.c +CGEMVTKERNEL = zgemv_t.c +ZGEMVTKERNEL = zgemv_t.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + + + + diff --git a/param.h b/param.h index ab0ed91..7bb27f3 100644 --- a/param.h +++ b/param.h @@ -1835,6 +1835,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif +#if defined(ARMV6) +#define SNUMOPT 2 +#define DNUMOPT 2 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 + +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + + +#define SYMV_P 16 +#endif + + #ifdef GENERIC -- 2.7.4