--- /dev/null
+
+ifeq ($(CORE), ARMV8)
+CCOMMON_OPT += -march=armv8-a
+FCOMMON_OPT += -march=armv8-a
+endif
+
+
NO_BINARY_MODE = 1
BINARY_DEFINED = 1
endif
+
+ifeq ($(ARCH), arm64)
+NO_BINARY_MODE = 1
+BINARY_DEFINED = 1
+endif
+
+
+
#
# C Compiler dependent settings
#
$architecture = sparc if ($data =~ /ARCH_SPARC/);
$architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/);
+$architecture = arm64 if ($data =~ /ARCH_ARM64/);
$defined = 0;
$architecture = sparc if ($data =~ /ARCH_SPARC/);
$architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/);
+$architecture = arm64 if ($data =~ /ARCH_ARM64/);
$binformat = bin32;
$binformat = bin64 if ($data =~ /BINARY_64/);
#endif
-#ifdef ARMV7
+#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
#endif
#include "common_arm.h"
#endif
+#ifdef ARCH_ARM64
+#include "common_arm64.h"
+#endif
+
#ifdef OS_LINUX
#include "common_linux.h"
#endif
--- /dev/null
+/*****************************************************************************
+Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ 3. Neither the name of the ISCAS nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+**********************************************************************************/
+
+/*********************************************************************/
+/* Copyright 2009, 2010 The University of Texas at Austin. */
+/* All rights reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the following */
+/* conditions are met: */
+/* */
+/* 1. Redistributions of source code must retain the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer. */
+/* */
+/* 2. Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
+/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
+/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
+/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
+/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
+/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
+/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
+/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
+/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
+/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
+/* POSSIBILITY OF SUCH DAMAGE. */
+/* */
+/* The views and conclusions contained in the software and */
+/* documentation are those of the authors and should not be */
+/* interpreted as representing official policies, either expressed */
+/* or implied, of The University of Texas at Austin. */
+/*********************************************************************/
+
+#ifndef COMMON_ARM64
+#define COMMON_ARM64
+
+#define MB
+#define WMB
+
+#define INLINE inline
+
+#define RETURN_BY_COMPLEX
+
+#ifndef ASSEMBLER
+
+static void __inline blas_lock(volatile BLASULONG *address){
+/*
+ int register ret;
+
+ do {
+ while (*address) {YIELDING;};
+
+ __asm__ __volatile__(
+ "ldrex r2, [%1] \n\t"
+ "mov r2, #0 \n\t"
+ "strex r3, r2, [%1] \n\t"
+ "mov %0 , r3 \n\t"
+ : "=r"(ret), "=r"(address)
+ : "1"(address)
+ : "memory", "r2" , "r3"
+
+
+ );
+
+ } while (ret);
+*/
+}
+
+
+static inline unsigned long long rpcc(void){
+ unsigned long long ret=0;
+ double v;
+ struct timeval tv;
+ gettimeofday(&tv,NULL);
+ v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
+ ret = (unsigned long long) ( v * 1000.0d );
+ return ret;
+}
+
+static inline int blas_quickdivide(blasint x, blasint y){
+ return x / y;
+}
+
+#if defined(DOUBLE)
+#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
+#else
+#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
+#endif
+
+#define GET_IMAGE_CANCEL
+
+#endif
+
+
+#ifndef F_INTERFACE
+#define REALNAME ASMNAME
+#else
+#define REALNAME ASMFNAME
+#endif
+
+#if defined(ASSEMBLER) && !defined(NEEDPARAM)
+
+#define PROLOGUE \
+ .arm ;\
+ .global REALNAME ;\
+ .func REALNAME ;\
+REALNAME:
+
+#define EPILOGUE
+
+#define PROFCODE
+
+#endif
+
+
+#define SEEK_ADDRESS
+
+#ifndef PAGESIZE
+#define PAGESIZE ( 4 << 10)
+#endif
+#define HUGE_PAGESIZE ( 4 << 20)
+
+#define BUFFER_SIZE (16 << 20)
+
+
+#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#endif
ARCH_ARM
#endif
+#if defined(__aarch64__)
+ARCH_ARM64
+#endif
#else
#endif
+#ifdef FORCE_ARMV8
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "ARMV8"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DARMV8 " \
+ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
+ "-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4"
+#define LIBNAME "armv8"
+#define CORENAME "ARMV8"
+#else
+#endif
+
+
#ifndef FORCE
USE_TRMM = 1
endif
+ifeq ($(ARCH), arm64)
+USE_TRMM = 1
+endif
+
ifeq ($(TARGET), LOONGSON3B)
USE_TRMM = 1
endif
--- /dev/null
+ifndef SNRM2KERNEL
+SNRM2KERNEL = nrm2.c
+endif
+
+ifndef DNRM2KERNEL
+DNRM2KERNEL = nrm2.c
+endif
+
+ifndef CNRM2KERNEL
+CNRM2KERNEL = znrm2.c
+endif
+
+ifndef ZNRM2KERNEL
+ZNRM2KERNEL = znrm2.c
+endif
+
+ifndef SCABS_KERNEL
+SCABS_KERNEL = ../generic/cabs.c
+endif
+
+ifndef DCABS_KERNEL
+DCABS_KERNEL = ../generic/cabs.c
+endif
+
+ifndef QCABS_KERNEL
+QCABS_KERNEL = ../generic/cabs.c
+endif
+
+ifndef LSAME_KERNEL
+LSAME_KERNEL = ../generic/lsame.c
+endif
+
+ifndef SGEMM_BETA
+SGEMM_BETA = ../generic/gemm_beta.c
+endif
+ifndef DGEMM_BETA
+DGEMM_BETA = ../generic/gemm_beta.c
+endif
+ifndef CGEMM_BETA
+CGEMM_BETA = ../generic/zgemm_beta.c
+endif
+ifndef ZGEMM_BETA
+ZGEMM_BETA = ../generic/zgemm_beta.c
+endif
+
+
--- /dev/null
+SAMAXKERNEL = ../arm/amax.c
+DAMAXKERNEL = ../arm/amax.c
+CAMAXKERNEL = ../arm/zamax.c
+ZAMAXKERNEL = ../arm/zamax.c
+
+SAMINKERNEL = ../arm/amin.c
+DAMINKERNEL = ../arm/amin.c
+CAMINKERNEL = ../arm/zamin.c
+ZAMINKERNEL = ../arm/zamin.c
+
+SMAXKERNEL = ../arm/max.c
+DMAXKERNEL = ../arm/max.c
+
+SMINKERNEL = ../arm/min.c
+DMINKERNEL = ../arm/min.c
+
+ISAMAXKERNEL = ../arm/iamax.c
+IDAMAXKERNEL = ../arm/iamax.c
+ICAMAXKERNEL = ../arm/izamax.c
+IZAMAXKERNEL = ../arm/izamax.c
+
+ISAMINKERNEL = ../arm/iamin.c
+IDAMINKERNEL = ../arm/iamin.c
+ICAMINKERNEL = ../arm/izamin.c
+IZAMINKERNEL = ../arm/izamin.c
+
+ISMAXKERNEL = ../arm/imax.c
+IDMAXKERNEL = ../arm/imax.c
+
+ISMINKERNEL = ../arm/imin.c
+IDMINKERNEL = ../arm/imin.c
+
+SASUMKERNEL = ../arm/asum.c
+DASUMKERNEL = ../arm/asum.c
+CASUMKERNEL = ../arm/zasum.c
+ZASUMKERNEL = ../arm/zasum.c
+
+SAXPYKERNEL = ../arm/axpy.c
+DAXPYKERNEL = ../arm/axpy.c
+CAXPYKERNEL = ../arm/zaxpy.c
+ZAXPYKERNEL = ../arm/zaxpy.c
+
+SCOPYKERNEL = ../arm/copy.c
+DCOPYKERNEL = ../arm/copy.c
+CCOPYKERNEL = ../arm/zcopy.c
+ZCOPYKERNEL = ../arm/zcopy.c
+
+SDOTKERNEL = ../arm/dot.c
+DDOTKERNEL = ../arm/dot.c
+CDOTKERNEL = ../arm/zdot.c
+ZDOTKERNEL = ../arm/zdot.c
+
+SNRM2KERNEL = ../arm/nrm2.c
+DNRM2KERNEL = ../arm/nrm2.c
+CNRM2KERNEL = ../arm/znrm2.c
+ZNRM2KERNEL = ../arm/znrm2.c
+
+SROTKERNEL = ../arm/rot.c
+DROTKERNEL = ../arm/rot.c
+CROTKERNEL = ../arm/zrot.c
+ZROTKERNEL = ../arm/zrot.c
+
+SSCALKERNEL = ../arm/scal.c
+DSCALKERNEL = ../arm/scal.c
+CSCALKERNEL = ../arm/zscal.c
+ZSCALKERNEL = ../arm/zscal.c
+
+SSWAPKERNEL = ../arm/swap.c
+DSWAPKERNEL = ../arm/swap.c
+CSWAPKERNEL = ../arm/zswap.c
+ZSWAPKERNEL = ../arm/zswap.c
+
+SGEMVNKERNEL = ../arm/gemv_n.c
+DGEMVNKERNEL = ../arm/gemv_n.c
+CGEMVNKERNEL = ../arm/zgemv_n.c
+ZGEMVNKERNEL = ../arm/zgemv_n.c
+
+SGEMVTKERNEL = ../arm/gemv_t.c
+DGEMVTKERNEL = ../arm/gemv_t.c
+CGEMVTKERNEL = ../arm/zgemv_t.c
+ZGEMVTKERNEL = ../arm/zgemv_t.c
+
+STRMMKERNEL = ../generic/trmmkernel_2x2.c
+DTRMMKERNEL = ../generic/trmmkernel_2x2.c
+CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
+ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
+
+SGEMMKERNEL = ../generic/gemmkernel_2x2.c
+SGEMMONCOPY = ../generic/gemm_ncopy_2.c
+SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
+SGEMMONCOPYOBJ = sgemm_oncopy.o
+SGEMMOTCOPYOBJ = sgemm_otcopy.o
+
+DGEMMKERNEL = ../generic/gemmkernel_2x2.c
+DGEMMONCOPY = ../generic/gemm_ncopy_2.c
+DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
+DGEMMONCOPYOBJ = dgemm_oncopy.o
+DGEMMOTCOPYOBJ = dgemm_otcopy.o
+
+CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
+CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+CGEMMONCOPYOBJ = cgemm_oncopy.o
+CGEMMOTCOPYOBJ = cgemm_otcopy.o
+
+ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
+ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+ZGEMMONCOPYOBJ = zgemm_oncopy.o
+ZGEMMOTCOPYOBJ = zgemm_otcopy.o
+
+STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+
+
+
--- /dev/null
+clean ::
+
--- /dev/null
+TOPDIR = ../../..
+include ../../../Makefile.system
+
+ifeq ($(CORE), CORE2)
+LASWP = ../generic/laswp_k_2.c
+ZLASWP = ../generic/zlaswp_k_2.c
+endif
+
+ifeq ($(CORE), OPTERON)
+LASWP = ../generic/laswp_k_1.c
+ZLASWP = ../generic/zlaswp_k_1.c
+endif
+
+ifeq ($(CORE), PRESCOTT)
+LASWP = ../generic/laswp_k_1.c
+ZLASWP = ../generic/zlaswp_k_1.c
+endif
+
+ifeq ($(DYNAMIC_ARCH), 1)
+LASWP = ../generic/laswp_k_4.c
+ZLASWP = ../generic/zlaswp_k_4.c
+endif
+
+ifndef LASWP
+LASWP = ../generic/laswp_k.c
+endif
+
+ifndef ZLASWP
+ZLASWP = ../generic/zlaswp_k.c
+endif
+
+include ../generic/Makefile
+
#define SYMV_P 16
#endif
+#if defined(ARMV8)
+#define SNUMOPT 2
+#define DNUMOPT 2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M 2
+#define SGEMM_DEFAULT_UNROLL_N 2
+
+#define DGEMM_DEFAULT_UNROLL_M 2
+#define DGEMM_DEFAULT_UNROLL_N 2
+
+#define CGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_N 2
+
+#define ZGEMM_DEFAULT_UNROLL_M 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 128
+#define CGEMM_DEFAULT_P 96
+#define ZGEMM_DEFAULT_P 64
+
+#define SGEMM_DEFAULT_Q 240
+#define DGEMM_DEFAULT_Q 120
+#define CGEMM_DEFAULT_Q 120
+#define ZGEMM_DEFAULT_Q 120
+
+#define SGEMM_DEFAULT_R 12288
+#define DGEMM_DEFAULT_R 8192
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
+
+
+#define SYMV_P 16
+#endif
+
+
#ifdef GENERIC