ifeq ($(CORE), ARMV7)
-CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard
-FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard
+CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
+FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
+endif
+
+ifeq ($(CORE), ARMV6)
+CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
+FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif
--- /dev/null
+/**************************************************************************
+ Copyright (c) 2013, The OpenBLAS Project
+ All rights reserved.
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ 3. Neither the name of the OpenBLAS project nor the names of
+ its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#include <string.h>
+
+#define CPU_UNKNOWN 0
+#define CPU_ARMV6 1
+#define CPU_ARMV7 2
+#define CPU_CORTEXA15 3
+
+static char *cpuname[] = {
+ "UNKOWN",
+ "ARMV6",
+ "ARMV7",
+ "CORTEXA15"
+};
+
+
+int get_feature(char *search)
+{
+
+#ifdef linux
+ FILE *infile;
+ char buffer[2048], *p,*t;
+ p = (char *) NULL ;
+
+ infile = fopen("/proc/cpuinfo", "r");
+
+ while (fgets(buffer, sizeof(buffer), infile))
+ {
+
+ if (!strncmp("Features", buffer, 8))
+ {
+ p = strchr(buffer, ':') + 2;
+ break;
+ }
+ }
+
+ fclose(infile);
+
+
+ if( p == NULL ) return;
+
+ t = strtok(p," ");
+ while( t = strtok(NULL," "))
+ {
+ if (!strcmp(t, search)) { return(1); }
+ }
+
+#endif
+ return(0);
+}
+
+
+int detect(void)
+{
+
+#ifdef linux
+
+ FILE *infile;
+ char buffer[512], *p;
+ p = (char *) NULL ;
+
+ infile = fopen("/proc/cpuinfo", "r");
+
+ while (fgets(buffer, sizeof(buffer), infile))
+ {
+
+ if (!strncmp("model name", buffer, 10))
+ {
+ p = strchr(buffer, ':') + 2;
+ break;
+ }
+ }
+
+ fclose(infile);
+
+ if(p != NULL)
+ {
+
+ if (strstr(p, "ARMv7"))
+ {
+ if ( get_feature("vfpv4"))
+ return CPU_ARMV7;
+
+ if ( get_feature("vfpv3"))
+ return CPU_ARMV7;
+
+ if ( get_feature("vfp"))
+ return CPU_ARMV6;
+
+
+ }
+
+ if (strstr(p, "ARMv6"))
+ {
+ if ( get_feature("vfp"))
+ return CPU_ARMV6;
+ }
+
+
+ }
+#endif
+
+ return CPU_UNKNOWN;
+}
+
+char *get_corename(void)
+{
+ return cpuname[detect()];
+}
+
+void get_architecture(void)
+{
+ printf("ARM");
+}
+
+void get_subarchitecture(void)
+{
+ int d = detect();
+ switch (d)
+ {
+
+ case CPU_ARMV7:
+ printf("ARMV7");
+ break;
+
+ case CPU_ARMV6:
+ printf("ARMV6");
+ break;
+
+ default:
+ printf("UNKNOWN");
+ break;
+ }
+}
+
+void get_subdirname(void)
+{
+ printf("arm");
+}
+
+void get_cpuconfig(void)
+{
+
+ int d = detect();
+ switch (d)
+ {
+
+ case CPU_ARMV7:
+ printf("#define ARMV7\n");
+ printf("#define HAVE_VFP\n");
+ printf("#define HAVE_VFPV3\n");
+ if ( get_feature("neon")) printf("#define HAVE_NEON\n");
+ if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
+ printf("#define L1_DATA_SIZE 65536\n");
+ printf("#define L1_DATA_LINESIZE 32\n");
+ printf("#define L2_SIZE 512488\n");
+ printf("#define L2_LINESIZE 32\n");
+ printf("#define DTB_DEFAULT_ENTRIES 64\n");
+ printf("#define DTB_SIZE 4096\n");
+ printf("#define L2_ASSOCIATIVE 4\n");
+ break;
+
+ case CPU_ARMV6:
+ printf("#define ARMV6\n");
+ printf("#define HAVE_VFP\n");
+ printf("#define L1_DATA_SIZE 65536\n");
+ printf("#define L1_DATA_LINESIZE 32\n");
+ printf("#define L2_SIZE 512488\n");
+ printf("#define L2_LINESIZE 32\n");
+ printf("#define DTB_DEFAULT_ENTRIES 64\n");
+ printf("#define DTB_SIZE 4096\n");
+ printf("#define L2_ASSOCIATIVE 4\n");
+ break;
+
+ }
+}
+
+
+void get_libname(void)
+{
+
+ int d = detect();
+ switch (d)
+ {
+
+ case CPU_ARMV7:
+ printf("armv7\n");
+ break;
+
+ case CPU_ARMV6:
+ printf("armv6\n");
+ break;
+
+ }
+}
+
+
+void get_features(void)
+{
+
+#ifdef linux
+ FILE *infile;
+ char buffer[2048], *p,*t;
+ p = (char *) NULL ;
+
+ infile = fopen("/proc/cpuinfo", "r");
+
+ while (fgets(buffer, sizeof(buffer), infile))
+ {
+
+ if (!strncmp("Features", buffer, 8))
+ {
+ p = strchr(buffer, ':') + 2;
+ break;
+ }
+ }
+
+ fclose(infile);
+
+
+ if( p == NULL ) return;
+
+ t = strtok(p," ");
+ while( t = strtok(NULL," "))
+ {
+ if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; }
+ if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; }
+ if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; }
+ if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; }
+ }
+
+#endif
+ return;
+}
+
+
#define ARCHCONFIG "-DARMV7 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
- "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
+ "-DHAVE_VFPV3 -DHAVE_VFP"
#define LIBNAME "armv7"
#define CORENAME "ARMV7"
#else
#endif
+#ifdef FORCE_ARMV6
+#define FORCE
+#define ARCHITECTURE "ARM"
+#define SUBARCHITECTURE "ARMV6"
+#define SUBDIRNAME "arm"
+#define ARCHCONFIG "-DARMV6 " \
+ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
+ "-DHAVE_VFP"
+#define LIBNAME "armv6"
+#define CORENAME "ARMV6"
+#else
+#endif
+
+
#ifndef FORCE
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
- defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
+ defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
+
#ifndef POWER
#define POWER
#endif
#define OPENBLAS_SUPPORTED
#endif
+
#if defined(__i386__) || (__x86_64__)
#include "cpuid_x86.c"
#define OPENBLAS_SUPPORTED
#define OPENBLAS_SUPPORTED
#endif
+#ifdef __arm__
+#include "cpuid_arm.c"
+#define OPENBLAS_SUPPORTED
+#endif
+
+
#ifndef OPENBLAS_SUPPORTED
#error "This arch/CPU is not supported by OpenBLAS."
#endif
-#else
-
#endif
static int get_num_cores(void) {
#ifdef FORCE
printf("CORE=%s\n", CORENAME);
#else
-#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__)
+#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
printf("CORE=%s\n", get_corename());
#endif
#endif
printf("NUM_CORES=%d\n", get_num_cores());
+#if defined(__arm__) && !defined(FORCE)
+ get_features();
+#endif
+
+
+
#if defined(__i386__) || defined(__x86_64__)
#ifndef FORCE
get_sse();
--- /dev/null
+SAMAXKERNEL = amax.c
+DAMAXKERNEL = amax.c
+CAMAXKERNEL = zamax.c
+ZAMAXKERNEL = zamax.c
+
+SAMINKERNEL = amin.c
+DAMINKERNEL = amin.c
+CAMINKERNEL = zamin.c
+ZAMINKERNEL = zamin.c
+
+SMAXKERNEL = max.c
+DMAXKERNEL = max.c
+
+SMINKERNEL = min.c
+DMINKERNEL = min.c
+
+ISAMAXKERNEL = iamax.c
+IDAMAXKERNEL = iamax.c
+ICAMAXKERNEL = izamax.c
+IZAMAXKERNEL = izamax.c
+
+ISAMINKERNEL = iamin.c
+IDAMINKERNEL = iamin.c
+ICAMINKERNEL = izamin.c
+IZAMINKERNEL = izamin.c
+
+ISMAXKERNEL = imax.c
+IDMAXKERNEL = imax.c
+
+ISMINKERNEL = imin.c
+IDMINKERNEL = imin.c
+
+SASUMKERNEL = asum.c
+DASUMKERNEL = asum.c
+CASUMKERNEL = zasum.c
+ZASUMKERNEL = zasum.c
+
+SAXPYKERNEL = axpy.c
+DAXPYKERNEL = axpy.c
+CAXPYKERNEL = zaxpy.c
+ZAXPYKERNEL = zaxpy.c
+
+SCOPYKERNEL = copy.c
+DCOPYKERNEL = copy.c
+CCOPYKERNEL = zcopy.c
+ZCOPYKERNEL = zcopy.c
+
+SDOTKERNEL = dot.c
+DDOTKERNEL = dot.c
+CDOTKERNEL = zdot.c
+ZDOTKERNEL = zdot.c
+
+SNRM2KERNEL = nrm2.c
+DNRM2KERNEL = nrm2.c
+CNRM2KERNEL = znrm2.c
+ZNRM2KERNEL = znrm2.c
+
+SROTKERNEL = rot.c
+DROTKERNEL = rot.c
+CROTKERNEL = zrot.c
+ZROTKERNEL = zrot.c
+
+SSCALKERNEL = scal.c
+DSCALKERNEL = scal.c
+CSCALKERNEL = zscal.c
+ZSCALKERNEL = zscal.c
+
+SSWAPKERNEL = swap.c
+DSWAPKERNEL = swap.c
+CSWAPKERNEL = zswap.c
+ZSWAPKERNEL = zswap.c
+
+SGEMVNKERNEL = gemv_n.c
+DGEMVNKERNEL = gemv_n.c
+CGEMVNKERNEL = zgemv_n.c
+ZGEMVNKERNEL = zgemv_n.c
+
+SGEMVTKERNEL = gemv_t.c
+DGEMVTKERNEL = gemv_t.c
+CGEMVTKERNEL = zgemv_t.c
+ZGEMVTKERNEL = zgemv_t.c
+
+STRMMKERNEL = ../generic/trmmkernel_2x2.c
+DTRMMKERNEL = ../generic/trmmkernel_2x2.c
+CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
+ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
+
+SGEMMKERNEL = ../generic/gemmkernel_2x2.c
+SGEMMONCOPY = ../generic/gemm_ncopy_2.c
+SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
+SGEMMONCOPYOBJ = sgemm_oncopy.o
+SGEMMOTCOPYOBJ = sgemm_otcopy.o
+
+DGEMMKERNEL = ../generic/gemmkernel_2x2.c
+DGEMMONCOPY = ../generic/gemm_ncopy_2.c
+DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
+DGEMMONCOPYOBJ = dgemm_oncopy.o
+DGEMMOTCOPYOBJ = dgemm_otcopy.o
+
+CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
+CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+CGEMMONCOPYOBJ = cgemm_oncopy.o
+CGEMMOTCOPYOBJ = cgemm_otcopy.o
+
+ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
+ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+ZGEMMONCOPYOBJ = zgemm_oncopy.o
+ZGEMMOTCOPYOBJ = zgemm_otcopy.o
+
+STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+
+
+
#endif
+#if defined(ARMV6)
+#define SNUMOPT 2
+#define DNUMOPT 2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M 2
+#define SGEMM_DEFAULT_UNROLL_N 2
+
+#define DGEMM_DEFAULT_UNROLL_M 2
+#define DGEMM_DEFAULT_UNROLL_N 2
+
+#define CGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_N 2
+
+#define ZGEMM_DEFAULT_UNROLL_M 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 128
+#define CGEMM_DEFAULT_P 96
+#define ZGEMM_DEFAULT_P 64
+
+#define SGEMM_DEFAULT_Q 240
+#define DGEMM_DEFAULT_Q 120
+#define CGEMM_DEFAULT_Q 120
+#define ZGEMM_DEFAULT_Q 120
+
+#define SGEMM_DEFAULT_R 12288
+#define DGEMM_DEFAULT_R 8192
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
+
+
+#define SYMV_P 16
+#endif
+
+
#ifdef GENERIC