/*****************************************************************************
-Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
+Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- 3. Neither the name of the ISCAS nor the names of its contributors may
- be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ 3. Neither the name of the OpenBLAS project nor the names of
+ its contributors may be used to endorse or promote products
+ derived from this software without specific prior written
+ permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
-#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
+#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)
#define OS_WINDOWS
#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+#define INTEL_AMD
+#endif
+
#include <stdio.h>
#include <string.h>
#ifdef OS_WINDOWS
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
-#ifdef linux
+#if defined(linux) || defined(__sun__)
#include <sys/sysinfo.h>
#include <unistd.h>
#endif
/* #define FORCE_POWER4 */
/* #define FORCE_POWER5 */
/* #define FORCE_POWER6 */
+/* #define FORCE_POWER7 */
+/* #define FORCE_POWER8 */
/* #define FORCE_PPCG4 */
/* #define FORCE_PPC970 */
/* #define FORCE_PPC970MP */
/* #define FORCE_SICORTEX */
/* #define FORCE_LOONGSON3A */
/* #define FORCE_LOONGSON3B */
+/* #define FORCE_I6400 */
+/* #define FORCE_P6600 */
+/* #define FORCE_P5600 */
/* #define FORCE_ITANIUM2 */
/* #define FORCE_SPARC */
/* #define FORCE_SPARCV7 */
#define CORENAME "BARCELONA"
#endif
-#if defined(FORCE_BOBCAT)
+#if defined(FORCE_BOBCAT)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define CORENAME "PILEDRIVER"
#endif
+#if defined (FORCE_STEAMROLLER)
+#define FORCE
+#define FORCE_INTEL
+#define ARCHITECTURE "X86"
+#define SUBARCHITECTURE "STEAMROLLER"
+#define ARCHCONFIG "-DSTEAMROLLER " \
+ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
+ "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
+ "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
+#define LIBNAME "steamroller"
+#define CORENAME "STEAMROLLER"
+#endif
+
+#if defined (FORCE_EXCAVATOR)
+#define FORCE
+#define FORCE_INTEL
+#define ARCHITECTURE "X86"
+#define SUBARCHITECTURE "EXCAVATOR"
+#define ARCHCONFIG "-DEXCAVATOR " \
+ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
+ "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
+ "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
+#define LIBNAME "excavator"
+#define CORENAME "EXCAVATOR"
+#endif
+
+#if defined (FORCE_ZEN)
+#define FORCE
+#define FORCE_INTEL
+#define ARCHITECTURE "X86"
+#define SUBARCHITECTURE "ZEN"
+#define ARCHCONFIG "-DZEN " \
+ "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL2_CODE_ASSOCIATIVE=8 " \
+ "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+ "-DL3_SIZE=16777216 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=8 " \
+ "-DITB_DEFAULT_ENTRIES=64 -DITB_SIZE=4096 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
+ "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
+ "-DHAVE_AVX -DHAVE_FMA3 -DFMA3"
+#define LIBNAME "zen"
+#define CORENAME "ZEN"
+#endif
+
+
#ifdef FORCE_SSE_GENERIC
#define FORCE
#define FORCE_INTEL
#define CORENAME "POWER5"
#endif
-#ifdef FORCE_POWER6
+#if defined(FORCE_POWER6) || defined(FORCE_POWER7)
#define FORCE
#define ARCHITECTURE "POWER"
#define SUBARCHITECTURE "POWER6"
#define CORENAME "POWER6"
#endif
+#if defined(FORCE_POWER8)
+#define FORCE
+#define ARCHITECTURE "POWER"
+#define SUBARCHITECTURE "POWER8"
+#define SUBDIRNAME "power"
+#define ARCHCONFIG "-DPOWER8 " \
+ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
+ "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
+ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
+#define LIBNAME "power8"
+#define CORENAME "POWER8"
+#endif
+
+
#ifdef FORCE_PPCG4
#define FORCE
#define ARCHITECTURE "POWER"
#else
#endif
+#ifdef FORCE_I6400
+#define FORCE
+#define ARCHITECTURE "MIPS"
+#define SUBARCHITECTURE "I6400"
+#define SUBDIRNAME "mips64"
+#define ARCHCONFIG "-DI6400 " \
+ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
+#define LIBNAME "i6400"
+#define CORENAME "I6400"
+#else
+#endif
+
+#ifdef FORCE_P6600
+#define FORCE
+#define ARCHITECTURE "MIPS"
+#define SUBARCHITECTURE "P6600"
+#define SUBDIRNAME "mips64"
+#define ARCHCONFIG "-DP6600 " \
+ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
+#define LIBNAME "p6600"
+#define CORENAME "P6600"
+#else
+#endif
+
+#ifdef FORCE_P5600
+#define FORCE
+#define ARCHITECTURE "MIPS"
+#define SUBARCHITECTURE "P5600"
+#define SUBDIRNAME "mips"
+#define ARCHCONFIG "-DP5600 " \
+ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
+#define LIBNAME "p5600"
+#define CORENAME "P5600"
+#else
+#endif
+
#ifdef FORCE_ITANIUM2
#define FORCE
#define ARCHITECTURE "IA64"
#else
#endif
+#ifdef FORCE_CORTEXA9
+#define FORCE
+#define ARCHITECTURE "ARM"
+#define SUBARCHITECTURE "CORTEXA9"
+#define SUBDIRNAME "arm"
+#define ARCHCONFIG "-DCORTEXA9 -DARMV7 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
+ "-DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
+#define LIBNAME "cortexa9"
+#define CORENAME "CORTEXA9"
+#else
+#endif
+
+#ifdef FORCE_CORTEXA15
+#define FORCE
+#define ARCHITECTURE "ARM"
+#define SUBARCHITECTURE "CORTEXA15"
+#define SUBDIRNAME "arm"
+#define ARCHCONFIG "-DCORTEXA15 -DARMV7 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
+ "-DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
+#define LIBNAME "cortexa15"
+#define CORENAME "CORTEXA15"
+#else
+#endif
+
#ifdef FORCE_ARMV6
#define FORCE
#define ARCHITECTURE "ARM"
#define ARCHCONFIG "-DARMV5 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
- "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
- "-DHAVE_VFP"
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
#define LIBNAME "armv5"
#define CORENAME "ARMV5"
#else
#define SUBARCHITECTURE "ARMV8"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DARMV8 " \
- "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
- "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
- "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
- "-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4"
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 "
#define LIBNAME "armv8"
#define CORENAME "ARMV8"
+#endif
+
+#ifdef FORCE_CORTEXA57
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "CORTEXA57"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DCORTEXA57 " \
+ "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
+ "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
+#define LIBNAME "cortexa57"
+#define CORENAME "CORTEXA57"
+#else
+#endif
+
+#ifdef FORCE_VULCAN
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "VULCAN"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DVULCAN " \
+ "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+ "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
+#define LIBNAME "vulcan"
+#define CORENAME "VULCAN"
#else
#endif
+#ifdef FORCE_THUNDERX
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "THUNDERX"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DTHUNDERX " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
+ "-DL2_SIZE=16777216 -DL2_LINESIZE=128 -DL2_ASSOCIATIVE=16 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 "
+#define LIBNAME "thunderx"
+#define CORENAME "THUNDERX"
+#else
+#endif
+
+#ifdef FORCE_THUNDERX2T99
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "THUNDERX2T99"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DTHUNDERX2T99 " \
+ "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+ "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
+#define LIBNAME "thunderx2t99"
+#define CORENAME "THUNDERX2T99"
+#else
+#endif
#ifndef FORCE
#define OPENBLAS_SUPPORTED
#endif
-#if defined(__i386__) || (__x86_64__)
+#if defined(__zarch__) || defined(__s390x__)
+#define ZARCH
+#include "cpuid_zarch.c"
+#define OPENBLAS_SUPPORTED
+#endif
+
+#ifdef INTEL_AMD
#include "cpuid_x86.c"
#define OPENBLAS_SUPPORTED
#endif
#endif
#ifdef __mips__
+#ifdef __mips64
+#include "cpuid_mips64.c"
+#else
#include "cpuid_mips.c"
+#endif
#define OPENBLAS_SUPPORTED
#endif
#define OPENBLAS_SUPPORTED
#endif
+#ifdef __aarch64__
+#include "cpuid_arm64.c"
+#define OPENBLAS_SUPPORTED
+#endif
+
#ifndef OPENBLAS_SUPPORTED
#error "This arch/CPU is not supported by OpenBLAS."
int m[2], count;
size_t len;
#endif
-
-#ifdef linux
+
+#if defined(linux) || defined(__sun__)
//returns the number of processors which are currently online
- return sysconf(_SC_NPROCESSORS_ONLN);
-
+ return sysconf(_SC_NPROCESSORS_CONF);
+
#elif defined(OS_WINDOWS)
GetSystemInfo(&sysinfo);
#ifdef FORCE
printf("CORE=%s\n", CORENAME);
-#else
-#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
+#else
+#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH)
printf("CORE=%s\n", get_corename());
#endif
#endif
#endif
-#if defined(__i386__) || defined(__x86_64__)
+#ifdef INTEL_AMD
#ifndef FORCE
get_sse();
#else
#endif
#endif
-#if NO_PARALLEL_MAKE==1
+#ifdef MAKE_NB_JOBS
+ #if MAKE_NB_JOBS > 0
+ printf("MAKE += -j %d\n", MAKE_NB_JOBS);
+ #else
+ // Let make use parent -j argument or -j1 if there
+ // is no make parent
+ #endif
+#elif NO_PARALLEL_MAKE==1
printf("MAKE += -j 1\n");
#else
#ifndef OS_WINDOWS
p ++;
}
} else {
+ if (*p != '\n')
printf("%c", *p);
p ++;
}
#else
get_cpuconfig();
#endif
+
+#ifdef FORCE
+ printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
+#else
+#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH)
+ printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
+#endif
+#endif
+
break;
case '2' : /* SMP */