--- /dev/null
+
+ifeq ($(CORE), Z13)
+CCOMMON_OPT += -march=z13
+FCOMMON_OPT += -march=z13
+endif
+
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64");
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
+$hostarch = "zarch" if ($hostarch eq "s390x");
$binary = $ENV{"BINARY"};
$architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/);
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
+$architecture = zarch if ($data =~ /ARCH_ZARCH/);
$defined = 0;
$defined = 1;
}
+if ($architecture eq "zarch") {
+ $defined = 1;
+ $binary = 64;
+}
+
if ($architecture eq "alpha") {
$defined = 1;
$binary = 64;
$architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/);
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
+$architecture = zarch if ($data =~ /ARCH_ZARCH/);
$binformat = bin32;
$binformat = bin64 if ($data =~ /BINARY_64/);
#include "common_arm64.h"
#endif
+#ifdef ARCH_ZARCH
+#include "common_zarch.h"
+#endif
+
#ifndef ASSEMBLER
#ifdef OS_WINDOWS
typedef char env_var_t[MAX_PATH];
static inline int my_mbind(void *addr, unsigned long len, int mode,
unsigned long *nodemask, unsigned long maxnode,
unsigned flags) {
-#if defined (__LSB_VERSION__)
+#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
// So far, LSB (Linux Standard Base) don't support syscall().
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
return 0;
}
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
-#if defined (__LSB_VERSION__)
+#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
// So far, LSB (Linux Standard Base) don't support syscall().
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
return 0;
--- /dev/null
+/*****************************************************************************
+Copyright (c) 2011-2016, The OpenBLAS Project
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ 3. Neither the name of the OpenBLAS project nor the names of
+ its contributors may be used to endorse or promote products
+ derived from this software without specific prior written
+ permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************************/
+
+#ifndef COMMON_ZARCH
+#define COMMON_ZARCH
+
+#define MB
+//__asm__ __volatile__ ("dmb ish" : : : "memory")
+#define WMB
+//__asm__ __volatile__ ("dmb ishst" : : : "memory")
+
+
+#define INLINE inline
+
+#define RETURN_BY_COMPLEX
+
+#ifndef ASSEMBLER
+
+ /*
+static void __inline blas_lock(volatile BLASULONG *address){
+
+ BLASULONG ret;
+
+ do {
+ while (*address) {YIELDING;};
+
+ __asm__ __volatile__(
+ "mov x4, #1 \n\t"
+ "1: \n\t"
+ "ldaxr x2, [%1] \n\t"
+ "cbnz x2, 1b \n\t"
+ "2: \n\t"
+ "stxr w3, x4, [%1] \n\t"
+ "cbnz w3, 1b \n\t"
+ "mov %0, #0 \n\t"
+ : "=r"(ret), "=r"(address)
+ : "1"(address)
+ : "memory", "x2" , "x3", "x4"
+
+
+ );
+
+
+ } while (ret);
+
+}
+ */
+//#define BLAS_LOCK_DEFINED
+
+
+
+static inline int blas_quickdivide(blasint x, blasint y){
+ return x / y;
+}
+
+#if defined(DOUBLE)
+#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
+#else
+#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
+#endif
+
+#define GET_IMAGE_CANCEL
+
+#endif
+
+
+#ifndef F_INTERFACE
+#define REALNAME ASMNAME
+#else
+#define REALNAME ASMFNAME
+#endif
+
+#if defined(ASSEMBLER) && !defined(NEEDPARAM)
+
+#define PROLOGUE \
+ .text ;\
+ .align 4 ;\
+ .global REALNAME ;\
+ .type REALNAME, %function ;\
+REALNAME:
+
+#define EPILOGUE
+
+#define PROFCODE
+
+#endif
+
+
+#define SEEK_ADDRESS
+
+#ifndef PAGESIZE
+#define PAGESIZE ( 4 << 10)
+#endif
+#define HUGE_PAGESIZE ( 4 << 20)
+
+#if defined(CORTEXA57)
+#define BUFFER_SIZE (20 << 20)
+#else
+#define BUFFER_SIZE (16 << 20)
+#endif
+
+
+#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#endif
+
--- /dev/null
+/**************************************************************************
+ Copyright (c) 2016, The OpenBLAS Project
+ All rights reserved.
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ 3. Neither the name of the OpenBLAS project nor the names of
+ its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#include <string.h>
+
+#define CPU_GENERIC 0
+#define CPU_Z13 1
+
+static char *cpuname[] = {
+ "ZARCH_GENERIC",
+ "Z13"
+};
+
+static char *cpuname_lower[] = {
+ "zarch_generic",
+ "z13"
+};
+
+int detect(void)
+{
+ return CPU_GENERIC;
+}
+
+void get_libname(void)
+{
+
+ int d = detect();
+ printf("%s", cpuname_lower[d]);
+}
+
+char *get_corename(void)
+{
+ return cpuname[detect()];
+}
+
+void get_architecture(void)
+{
+ printf("ZARCH");
+}
+
+void get_subarchitecture(void)
+{
+ int d = detect();
+ printf("%s", cpuname[d]);
+}
+
+void get_subdirname(void)
+{
+ printf("zarch");
+}
+
+
+void get_cpuconfig(void)
+{
+
+ int d = detect();
+ switch (d){
+ case CPU_GENERIC:
+ printf("#define ZARCH_GENERIC\n");
+ printf("#define DTB_DEFAULT_ENTRIES 64\n");
+ break;
+ case CPU_Z13:
+ printf("#define Z13\n");
+ printf("#define DTB_DEFAULT_ENTRIES 64\n");
+ break;
+ }
+}
ARCH_POWER
#endif
+#if defined(__s390x__) || defined(__zarch__)
+ARCH_ZARCH
+#endif
+
#ifdef __mips64
ARCH_MIPS64
#endif
#define OPENBLAS_SUPPORTED
#endif
+#if defined(__zarch__) || defined(__s390x__)
+#define ZARCH
+#include "cpuid_zarch.c"
+#define OPENBLAS_SUPPORTED
+#endif
+
#ifdef INTEL_AMD
#include "cpuid_x86.c"
#define OPENBLAS_SUPPORTED
#ifdef FORCE
printf("CORE=%s\n", CORENAME);
#else
-#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
+#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH)
printf("CORE=%s\n", get_corename());
#endif
#endif
#ifdef FORCE
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
#else
-#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
+#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH)
printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
#endif
#endif
--- /dev/null
+ifndef SCABS_KERNEL
+SCABS_KERNEL = ../generic/cabs.c
+endif
+
+ifndef DCABS_KERNEL
+DCABS_KERNEL = ../generic/cabs.c
+endif
+
+ifndef QCABS_KERNEL
+QCABS_KERNEL = ../generic/cabs.c
+endif
+
+ifndef LSAME_KERNEL
+LSAME_KERNEL = ../generic/lsame.c
+endif
+
+ifndef SGEMM_BETA
+SGEMM_BETA = ../generic/gemm_beta.c
+endif
+ifndef DGEMM_BETA
+DGEMM_BETA = ../generic/gemm_beta.c
+endif
+ifndef CGEMM_BETA
+CGEMM_BETA = ../generic/zgemm_beta.c
+endif
+ifndef ZGEMM_BETA
+ZGEMM_BETA = ../generic/zgemm_beta.c
+endif
+
+
--- /dev/null
+SAMAXKERNEL = ../arm/amax.c
+DAMAXKERNEL = ../arm/amax.c
+CAMAXKERNEL = ../arm/zamax.c
+ZAMAXKERNEL = ../arm/zamax.c
+
+SAMINKERNEL = ../arm/amin.c
+DAMINKERNEL = ../arm/amin.c
+CAMINKERNEL = ../arm/zamin.c
+ZAMINKERNEL = ../arm/zamin.c
+
+SMAXKERNEL = ../arm/max.c
+DMAXKERNEL = ../arm/max.c
+
+SMINKERNEL = ../arm/min.c
+DMINKERNEL = ../arm/min.c
+
+ISAMAXKERNEL = ../arm/iamax.c
+IDAMAXKERNEL = ../arm/iamax.c
+ICAMAXKERNEL = ../arm/izamax.c
+IZAMAXKERNEL = ../arm/izamax.c
+
+ISAMINKERNEL = ../arm/iamin.c
+IDAMINKERNEL = ../arm/iamin.c
+ICAMINKERNEL = ../arm/izamin.c
+IZAMINKERNEL = ../arm/izamin.c
+
+ISMAXKERNEL = ../arm/imax.c
+IDMAXKERNEL = ../arm/imax.c
+
+ISMINKERNEL = ../arm/imin.c
+IDMINKERNEL = ../arm/imin.c
+
+SASUMKERNEL = ../arm/asum.c
+DASUMKERNEL = ../arm/asum.c
+CASUMKERNEL = ../arm/zasum.c
+ZASUMKERNEL = ../arm/zasum.c
+
+SAXPYKERNEL = ../arm/axpy.c
+DAXPYKERNEL = ../arm/axpy.c
+CAXPYKERNEL = ../arm/zaxpy.c
+ZAXPYKERNEL = ../arm/zaxpy.c
+
+SCOPYKERNEL = ../arm/copy.c
+DCOPYKERNEL = ../arm/copy.c
+CCOPYKERNEL = ../arm/zcopy.c
+ZCOPYKERNEL = ../arm/zcopy.c
+
+SDOTKERNEL = ../arm/dot.c
+DDOTKERNEL = ../arm/dot.c
+CDOTKERNEL = ../arm/zdot.c
+ZDOTKERNEL = ../arm/zdot.c
+
+SNRM2KERNEL = ../arm/nrm2.c
+DNRM2KERNEL = ../arm/nrm2.c
+CNRM2KERNEL = ../arm/znrm2.c
+ZNRM2KERNEL = ../arm/znrm2.c
+
+SROTKERNEL = ../arm/rot.c
+DROTKERNEL = ../arm/rot.c
+CROTKERNEL = ../arm/zrot.c
+ZROTKERNEL = ../arm/zrot.c
+
+SSCALKERNEL = ../arm/scal.c
+DSCALKERNEL = ../arm/scal.c
+CSCALKERNEL = ../arm/zscal.c
+ZSCALKERNEL = ../arm/zscal.c
+
+SSWAPKERNEL = ../arm/swap.c
+DSWAPKERNEL = ../arm/swap.c
+CSWAPKERNEL = ../arm/zswap.c
+ZSWAPKERNEL = ../arm/zswap.c
+
+SGEMVNKERNEL = ../arm/gemv_n.c
+DGEMVNKERNEL = ../arm/gemv_n.c
+CGEMVNKERNEL = ../arm/zgemv_n.c
+ZGEMVNKERNEL = ../arm/zgemv_n.c
+
+SGEMVTKERNEL = ../arm/gemv_t.c
+DGEMVTKERNEL = ../arm/gemv_t.c
+CGEMVTKERNEL = ../arm/zgemv_t.c
+ZGEMVTKERNEL = ../arm/zgemv_t.c
+
+STRMMKERNEL = ../generic/trmmkernel_2x2.c
+DTRMMKERNEL = ../generic/trmmkernel_2x2.c
+CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
+ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
+
+SGEMMKERNEL = ../generic/gemmkernel_2x2.c
+SGEMMONCOPY = ../generic/gemm_ncopy_2.c
+SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
+SGEMMONCOPYOBJ = sgemm_oncopy.o
+SGEMMOTCOPYOBJ = sgemm_otcopy.o
+
+DGEMMKERNEL = ../generic/gemmkernel_2x2.c
+DGEMMONCOPY = ../generic/gemm_ncopy_2.c
+DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
+DGEMMONCOPYOBJ = dgemm_oncopy.o
+DGEMMOTCOPYOBJ = dgemm_otcopy.o
+
+CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
+CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+CGEMMONCOPYOBJ = cgemm_oncopy.o
+CGEMMOTCOPYOBJ = cgemm_otcopy.o
+
+ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
+ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+ZGEMMONCOPYOBJ = zgemm_oncopy.o
+ZGEMMOTCOPYOBJ = zgemm_otcopy.o
+
+STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+
+
+
--- /dev/null
+clean ::
+
#endif
+#if defined(ZARCH_GENERIC)
+#define SNUMOPT 2
+#define DNUMOPT 2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M 2
+#define SGEMM_DEFAULT_UNROLL_N 2
+
+#define DGEMM_DEFAULT_UNROLL_M 2
+#define DGEMM_DEFAULT_UNROLL_N 2
+
+#define CGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_N 2
+
+#define ZGEMM_DEFAULT_UNROLL_M 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 128
+#define CGEMM_DEFAULT_P 96
+#define ZGEMM_DEFAULT_P 64
+
+#define SGEMM_DEFAULT_Q 240
+#define DGEMM_DEFAULT_Q 120
+#define CGEMM_DEFAULT_Q 120
+#define ZGEMM_DEFAULT_Q 120
+
+#define SGEMM_DEFAULT_R 12288
+#define DGEMM_DEFAULT_R 8192
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
+
+
+#define SYMV_P 16
+#endif
+
#ifdef GENERIC