added cpu detection and target ARMV6, used in raspberry pi
authorwernsaar <wernsaar@googlemail.com>
Thu, 21 Nov 2013 19:18:51 +0000 (20:18 +0100)
committerwernsaar <wernsaar@googlemail.com>
Thu, 21 Nov 2013 19:18:51 +0000 (20:18 +0100)
Makefile.arm
cpuid_arm.c [new file with mode: 0644]
getarch.c
kernel/arm/KERNEL.ARMV6 [new file with mode: 0644]
param.h

index 6cdeb2f..8502d52 100644 (file)
@@ -1,7 +1,12 @@
 
 ifeq ($(CORE), ARMV7)
-CCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard
-FCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard
+CCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
+FCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
+endif
+
+ifeq ($(CORE), ARMV6)
+CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard  -march=armv6
+FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard  -march=armv6
 endif
 
 
diff --git a/cpuid_arm.c b/cpuid_arm.c
new file mode 100644 (file)
index 0000000..efd1369
--- /dev/null
@@ -0,0 +1,262 @@
+/**************************************************************************
+  Copyright (c) 2013, The OpenBLAS Project
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are
+  met:
+  1. Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+  2. Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in
+  the documentation and/or other materials provided with the
+  distribution.
+  3. Neither the name of the OpenBLAS project nor the names of
+  its contributors may be used to endorse or promote products
+  derived from this software without specific prior written permission.
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  *****************************************************************************/
+
+#include <string.h>
+
+#define CPU_UNKNOWN            0
+#define CPU_ARMV6              1
+#define CPU_ARMV7              2
+#define CPU_CORTEXA15          3
+
+static char *cpuname[] = {
+  "UNKOWN",
+  "ARMV6",
+  "ARMV7",
+  "CORTEXA15"
+};
+
+
+int get_feature(char *search)
+{
+
+#ifdef linux
+       FILE *infile;
+       char buffer[2048], *p,*t;
+       p = (char *) NULL ;
+
+       infile = fopen("/proc/cpuinfo", "r");
+
+       while (fgets(buffer, sizeof(buffer), infile))
+       {
+
+               if (!strncmp("Features", buffer, 8))
+               {
+                       p = strchr(buffer, ':') + 2;
+                       break;
+               }
+       }
+
+       fclose(infile);
+
+
+       if( p == NULL ) return;
+
+       t = strtok(p," ");
+       while( t = strtok(NULL," "))
+       {       
+               if (!strcmp(t, search))   { return(1); }
+       }
+
+#endif
+       return(0);
+}
+
+
+int detect(void)
+{
+
+#ifdef linux
+
+       FILE *infile;
+       char buffer[512], *p;
+       p = (char *) NULL ;
+
+       infile = fopen("/proc/cpuinfo", "r");
+
+       while (fgets(buffer, sizeof(buffer), infile))
+       {
+
+               if (!strncmp("model name", buffer, 10))
+               {
+                       p = strchr(buffer, ':') + 2;
+                       break;
+               }
+       }
+
+       fclose(infile);
+
+       if(p != NULL)
+       {
+
+               if (strstr(p, "ARMv7")) 
+               {
+                        if ( get_feature("vfpv4"))
+                               return CPU_ARMV7;
+
+                        if ( get_feature("vfpv3"))
+                               return CPU_ARMV7;
+
+                        if ( get_feature("vfp"))
+                               return CPU_ARMV6;
+
+
+               }
+
+               if (strstr(p, "ARMv6")) 
+               {
+                        if ( get_feature("vfp"))
+                               return CPU_ARMV6;
+               }
+
+
+       }
+#endif
+
+       return CPU_UNKNOWN;
+}
+
+char *get_corename(void)
+{
+       return cpuname[detect()];
+}
+
+void get_architecture(void)
+{
+       printf("ARM");
+}
+
+void get_subarchitecture(void)
+{
+       int d = detect();
+       switch (d)
+       {
+
+               case CPU_ARMV7:
+                       printf("ARMV7");
+                       break;
+
+               case CPU_ARMV6:
+                       printf("ARMV6");
+                       break;
+
+               default:
+                       printf("UNKNOWN");
+                       break;
+       }
+}
+
+void get_subdirname(void)
+{
+       printf("arm");
+}
+
+void get_cpuconfig(void)
+{
+
+       int d = detect();
+       switch (d)
+       {
+
+               case CPU_ARMV7:
+                       printf("#define ARMV7\n");
+                       printf("#define HAVE_VFP\n");
+                       printf("#define HAVE_VFPV3\n");
+                       if ( get_feature("neon"))       printf("#define HAVE_NEON\n");
+                       if ( get_feature("vfpv4"))      printf("#define HAVE_VFPV4\n");
+                       printf("#define L1_DATA_SIZE 65536\n");
+                       printf("#define L1_DATA_LINESIZE 32\n");
+                       printf("#define L2_SIZE 512488\n");
+                       printf("#define L2_LINESIZE 32\n");
+                       printf("#define DTB_DEFAULT_ENTRIES 64\n");
+                       printf("#define DTB_SIZE 4096\n");
+                       printf("#define L2_ASSOCIATIVE 4\n");
+                       break;
+
+               case CPU_ARMV6:
+                       printf("#define ARMV6\n");
+                       printf("#define HAVE_VFP\n");
+                       printf("#define L1_DATA_SIZE 65536\n");
+                       printf("#define L1_DATA_LINESIZE 32\n");
+                       printf("#define L2_SIZE 512488\n");
+                       printf("#define L2_LINESIZE 32\n");
+                       printf("#define DTB_DEFAULT_ENTRIES 64\n");
+                       printf("#define DTB_SIZE 4096\n");
+                       printf("#define L2_ASSOCIATIVE 4\n");
+                       break;
+
+       }
+}
+
+
+void get_libname(void)
+{
+
+       int d = detect();
+       switch (d)
+       {
+
+               case CPU_ARMV7:
+                       printf("armv7\n");
+                       break;
+
+               case CPU_ARMV6:
+                       printf("armv6\n");
+                       break;
+
+       }
+}
+
+
+void get_features(void)
+{
+
+#ifdef linux
+       FILE *infile;
+       char buffer[2048], *p,*t;
+       p = (char *) NULL ;
+
+       infile = fopen("/proc/cpuinfo", "r");
+
+       while (fgets(buffer, sizeof(buffer), infile))
+       {
+
+               if (!strncmp("Features", buffer, 8))
+               {
+                       p = strchr(buffer, ':') + 2;
+                       break;
+               }
+       }
+
+       fclose(infile);
+
+
+       if( p == NULL ) return;
+
+       t = strtok(p," ");
+       while( t = strtok(NULL," "))
+       {       
+               if (!strcmp(t, "vfp"))   { printf("HAVE_VFP=1\n"); continue; }
+               if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; }
+               if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; }
+               if (!strcmp(t, "neon"))  { printf("HAVE_NEON=1\n"); continue; }
+       }
+
+#endif
+       return;
+}
+
+
index 3264a76..4407e3d 100644 (file)
--- a/getarch.c
+++ b/getarch.c
@@ -687,23 +687,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define ARCHCONFIG   "-DARMV7 " \
        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
-       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
+       "-DHAVE_VFPV3 -DHAVE_VFP"
 #define LIBNAME   "armv7"
 #define CORENAME  "ARMV7"
 #else
 #endif
 
+#ifdef FORCE_ARMV6
+#define FORCE
+#define ARCHITECTURE    "ARM"
+#define SUBARCHITECTURE "ARMV6"
+#define SUBDIRNAME      "arm"
+#define ARCHCONFIG   "-DARMV6 " \
+       "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+       "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
+       "-DHAVE_VFP"
+#define LIBNAME   "armv6"
+#define CORENAME  "ARMV6"
+#else
+#endif
+
+
 
 #ifndef FORCE
 
 #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
-    defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
+    defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) 
+
 #ifndef POWER
 #define POWER
 #endif
 #define OPENBLAS_SUPPORTED
 #endif
 
+
 #if defined(__i386__) || (__x86_64__)
 #include "cpuid_x86.c"
 #define OPENBLAS_SUPPORTED
@@ -734,12 +753,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define OPENBLAS_SUPPORTED
 #endif
 
+#ifdef __arm__
+#include "cpuid_arm.c"
+#define OPENBLAS_SUPPORTED
+#endif
+
+
 #ifndef OPENBLAS_SUPPORTED
 #error "This arch/CPU is not supported by OpenBLAS."
 #endif
 
-#else
-
 #endif
 
 static int get_num_cores(void) {
@@ -788,7 +811,7 @@ int main(int argc, char *argv[]){
 #ifdef FORCE
     printf("CORE=%s\n", CORENAME);
 #else    
-#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__)
+#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
     printf("CORE=%s\n", get_corename());
 #endif
 #endif
@@ -803,6 +826,12 @@ int main(int argc, char *argv[]){
 
     printf("NUM_CORES=%d\n", get_num_cores());
 
+#if defined(__arm__) && !defined(FORCE)
+       get_features();
+#endif
+
+
+
 #if defined(__i386__) || defined(__x86_64__)
 #ifndef FORCE
     get_sse();
diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6
new file mode 100644 (file)
index 0000000..e379347
--- /dev/null
@@ -0,0 +1,134 @@
+SAMAXKERNEL  = amax.c
+DAMAXKERNEL  = amax.c
+CAMAXKERNEL  = zamax.c
+ZAMAXKERNEL  = zamax.c
+
+SAMINKERNEL  = amin.c
+DAMINKERNEL  = amin.c
+CAMINKERNEL  = zamin.c
+ZAMINKERNEL  = zamin.c
+
+SMAXKERNEL   = max.c
+DMAXKERNEL   = max.c
+
+SMINKERNEL   = min.c
+DMINKERNEL   = min.c
+
+ISAMAXKERNEL = iamax.c
+IDAMAXKERNEL = iamax.c
+ICAMAXKERNEL = izamax.c
+IZAMAXKERNEL = izamax.c
+
+ISAMINKERNEL = iamin.c
+IDAMINKERNEL = iamin.c
+ICAMINKERNEL = izamin.c
+IZAMINKERNEL = izamin.c
+
+ISMAXKERNEL  = imax.c
+IDMAXKERNEL  = imax.c
+
+ISMINKERNEL  = imin.c
+IDMINKERNEL  = imin.c
+
+SASUMKERNEL  = asum.c
+DASUMKERNEL  = asum.c
+CASUMKERNEL  = zasum.c
+ZASUMKERNEL  = zasum.c
+
+SAXPYKERNEL  = axpy.c
+DAXPYKERNEL  = axpy.c
+CAXPYKERNEL  = zaxpy.c
+ZAXPYKERNEL  = zaxpy.c
+
+SCOPYKERNEL  = copy.c
+DCOPYKERNEL  = copy.c
+CCOPYKERNEL  = zcopy.c
+ZCOPYKERNEL  = zcopy.c
+
+SDOTKERNEL   = dot.c
+DDOTKERNEL   = dot.c
+CDOTKERNEL   = zdot.c
+ZDOTKERNEL   = zdot.c
+
+SNRM2KERNEL  = nrm2.c
+DNRM2KERNEL  = nrm2.c
+CNRM2KERNEL  = znrm2.c
+ZNRM2KERNEL  = znrm2.c
+
+SROTKERNEL   = rot.c
+DROTKERNEL   = rot.c
+CROTKERNEL   = zrot.c
+ZROTKERNEL   = zrot.c
+
+SSCALKERNEL  =  scal.c
+DSCALKERNEL  =  scal.c
+CSCALKERNEL  =  zscal.c
+ZSCALKERNEL  =  zscal.c
+
+SSWAPKERNEL  =  swap.c
+DSWAPKERNEL  =  swap.c
+CSWAPKERNEL  =  zswap.c
+ZSWAPKERNEL  =  zswap.c
+
+SGEMVNKERNEL = gemv_n.c
+DGEMVNKERNEL = gemv_n.c
+CGEMVNKERNEL = zgemv_n.c
+ZGEMVNKERNEL = zgemv_n.c
+
+SGEMVTKERNEL = gemv_t.c
+DGEMVTKERNEL = gemv_t.c
+CGEMVTKERNEL = zgemv_t.c
+ZGEMVTKERNEL = zgemv_t.c
+
+STRMMKERNEL    = ../generic/trmmkernel_2x2.c
+DTRMMKERNEL    = ../generic/trmmkernel_2x2.c
+CTRMMKERNEL    = ../generic/ztrmmkernel_2x2.c
+ZTRMMKERNEL    = ../generic/ztrmmkernel_2x2.c
+
+SGEMMKERNEL    =  ../generic/gemmkernel_2x2.c          
+SGEMMONCOPY    =  ../generic/gemm_ncopy_2.c
+SGEMMOTCOPY    =  ../generic/gemm_tcopy_2.c
+SGEMMONCOPYOBJ =  sgemm_oncopy.o
+SGEMMOTCOPYOBJ =  sgemm_otcopy.o
+
+DGEMMKERNEL    =  ../generic/gemmkernel_2x2.c          
+DGEMMONCOPY    = ../generic/gemm_ncopy_2.c
+DGEMMOTCOPY    = ../generic/gemm_tcopy_2.c
+DGEMMONCOPYOBJ = dgemm_oncopy.o
+DGEMMOTCOPYOBJ = dgemm_otcopy.o
+
+CGEMMKERNEL    = ../generic/zgemmkernel_2x2.c
+CGEMMONCOPY    = ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c
+CGEMMONCOPYOBJ =  cgemm_oncopy.o
+CGEMMOTCOPYOBJ =  cgemm_otcopy.o
+
+ZGEMMKERNEL    = ../generic/zgemmkernel_2x2.c
+ZGEMMONCOPY    = ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c
+ZGEMMONCOPYOBJ =  zgemm_oncopy.o
+ZGEMMOTCOPYOBJ =  zgemm_otcopy.o
+
+STRSMKERNEL_LN =  ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT =  ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN =  ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT =  ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+
+
+
diff --git a/param.h b/param.h
index ab0ed91..7bb27f3 100644 (file)
--- a/param.h
+++ b/param.h
@@ -1835,6 +1835,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 
 
+#if defined(ARMV6)
+#define SNUMOPT                2
+#define DNUMOPT                2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M  2
+#define SGEMM_DEFAULT_UNROLL_N  2
+
+#define DGEMM_DEFAULT_UNROLL_M  2
+#define DGEMM_DEFAULT_UNROLL_N  2
+
+#define CGEMM_DEFAULT_UNROLL_M  2
+#define CGEMM_DEFAULT_UNROLL_N  2
+
+#define ZGEMM_DEFAULT_UNROLL_M  2
+#define ZGEMM_DEFAULT_UNROLL_N  2
+
+#define SGEMM_DEFAULT_P        128
+#define DGEMM_DEFAULT_P        128
+#define CGEMM_DEFAULT_P 96
+#define ZGEMM_DEFAULT_P 64
+
+#define SGEMM_DEFAULT_Q 240
+#define DGEMM_DEFAULT_Q 120
+#define CGEMM_DEFAULT_Q 120
+#define ZGEMM_DEFAULT_Q 120
+
+#define SGEMM_DEFAULT_R 12288
+#define DGEMM_DEFAULT_R 8192
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
+
+
+#define SYMV_P 16
+#endif
+
+
 
 #ifdef GENERIC