added experimental support for ARMV8
authorwernsaar <wernsaar@googlemail.com>
Sun, 24 Nov 2013 14:47:00 +0000 (15:47 +0100)
committerwernsaar <wernsaar@googlemail.com>
Sun, 24 Nov 2013 14:47:00 +0000 (15:47 +0100)
13 files changed:
Makefile.arm64 [new file with mode: 0644]
Makefile.system
c_check
common.h
common_arm64.h [new file with mode: 0644]
ctest.c
getarch.c
kernel/Makefile.L3
kernel/arm64/KERNEL [new file with mode: 0644]
kernel/arm64/KERNEL.ARMV8 [new file with mode: 0644]
kernel/arm64/Makefile [new file with mode: 0644]
lapack/laswp/arm64/Makefile [new file with mode: 0644]
param.h

diff --git a/Makefile.arm64 b/Makefile.arm64
new file mode 100644 (file)
index 0000000..a4f8bab
--- /dev/null
@@ -0,0 +1,7 @@
+
+ifeq ($(CORE), ARMV8)
+CCOMMON_OPT += -march=armv8-a
+FCOMMON_OPT += -march=armv8-a
+endif
+
+
index 0f5e9c6..aceadf2 100644 (file)
@@ -367,6 +367,14 @@ ifeq ($(ARCH), arm)
 NO_BINARY_MODE = 1
 BINARY_DEFINED = 1
 endif
+
+ifeq ($(ARCH), arm64)
+NO_BINARY_MODE = 1
+BINARY_DEFINED = 1
+endif
+
+
+
 #
 #  C Compiler dependent settings
 #
diff --git a/c_check b/c_check
index c1cdd59..0828a5b 100644 (file)
--- a/c_check
+++ b/c_check
@@ -64,6 +64,7 @@ $architecture = alpha  if ($data =~ /ARCH_ALPHA/);
 $architecture = sparc  if ($data =~ /ARCH_SPARC/);
 $architecture = ia64   if ($data =~ /ARCH_IA64/);
 $architecture = arm    if ($data =~ /ARCH_ARM/);
+$architecture = arm64  if ($data =~ /ARCH_ARM64/);
 
 $defined = 0;
 
@@ -151,6 +152,7 @@ $architecture = alpha  if ($data =~ /ARCH_ALPHA/);
 $architecture = sparc  if ($data =~ /ARCH_SPARC/);
 $architecture = ia64   if ($data =~ /ARCH_IA64/);
 $architecture = arm    if ($data =~ /ARCH_ARM/);
+$architecture = arm64  if ($data =~ /ARCH_ARM64/);
 
 $binformat    = bin32;
 $binformat    = bin64  if ($data =~ /BINARY_64/);
index a277552..310fcad 100644 (file)
--- a/common.h
+++ b/common.h
@@ -311,7 +311,7 @@ typedef int blasint;
 #endif
 
 
-#ifdef ARMV7
+#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
 #define YIELDING       asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
 #endif
 
@@ -375,6 +375,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246
 #include "common_arm.h"
 #endif
 
+#ifdef ARCH_ARM64
+#include "common_arm64.h"
+#endif
+
 #ifdef OS_LINUX
 #include "common_linux.h"
 #endif
diff --git a/common_arm64.h b/common_arm64.h
new file mode 100644 (file)
index 0000000..2da0d89
--- /dev/null
@@ -0,0 +1,169 @@
+/*****************************************************************************
+Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+   3. Neither the name of the ISCAS nor the names of its contributors may 
+      be used to endorse or promote products derived from this software 
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+**********************************************************************************/
+
+/*********************************************************************/
+/* Copyright 2009, 2010 The University of Texas at Austin.           */
+/* All rights reserved.                                              */
+/*                                                                   */
+/* Redistribution and use in source and binary forms, with or        */
+/* without modification, are permitted provided that the following   */
+/* conditions are met:                                               */
+/*                                                                   */
+/*   1. Redistributions of source code must retain the above         */
+/*      copyright notice, this list of conditions and the following  */
+/*      disclaimer.                                                  */
+/*                                                                   */
+/*   2. Redistributions in binary form must reproduce the above      */
+/*      copyright notice, this list of conditions and the following  */
+/*      disclaimer in the documentation and/or other materials       */
+/*      provided with the distribution.                              */
+/*                                                                   */
+/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
+/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
+/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
+/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
+/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
+/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
+/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
+/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
+/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
+/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
+/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
+/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
+/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
+/*    POSSIBILITY OF SUCH DAMAGE.                                    */
+/*                                                                   */
+/* The views and conclusions contained in the software and           */
+/* documentation are those of the authors and should not be          */
+/* interpreted as representing official policies, either expressed   */
+/* or implied, of The University of Texas at Austin.                 */
+/*********************************************************************/
+
+#ifndef COMMON_ARM64
+#define COMMON_ARM64
+
+#define MB
+#define WMB
+
+#define INLINE inline
+
+#define RETURN_BY_COMPLEX
+
+#ifndef ASSEMBLER
+
+static void __inline blas_lock(volatile BLASULONG *address){
+/*
+  int register ret;
+
+  do {
+    while (*address) {YIELDING;};
+
+    __asm__ __volatile__(
+                         "ldrex r2, [%1]                                                \n\t"
+                         "mov   r2, #0                                                  \n\t"
+                         "strex r3, r2, [%1]                                            \n\t"
+                        "mov   %0 , r3                                                 \n\t"
+                         : "=r"(ret), "=r"(address)
+                         : "1"(address)
+                         : "memory", "r2" , "r3" 
+
+
+    );
+
+  } while (ret);
+*/
+}
+
+
+static inline unsigned long long rpcc(void){
+  unsigned long long ret=0;
+  double v;
+  struct timeval tv;
+  gettimeofday(&tv,NULL);
+  v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
+  ret = (unsigned long long) ( v * 1000.0d );
+  return ret;
+}
+
+static inline int blas_quickdivide(blasint x, blasint y){
+  return x / y;
+}
+
+#if defined(DOUBLE)
+#define GET_IMAGE(res)  __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
+#else
+#define GET_IMAGE(res)  __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
+#endif
+
+#define GET_IMAGE_CANCEL
+
+#endif
+
+
+#ifndef F_INTERFACE
+#define REALNAME ASMNAME
+#else
+#define REALNAME ASMFNAME
+#endif
+
+#if defined(ASSEMBLER) && !defined(NEEDPARAM)
+
+#define PROLOGUE \
+       .arm             ;\
+       .global REALNAME ;\
+       .func   REALNAME  ;\
+REALNAME:
+
+#define EPILOGUE 
+
+#define PROFCODE
+
+#endif
+
+
+#define SEEK_ADDRESS
+
+#ifndef PAGESIZE
+#define PAGESIZE        ( 4 << 10)
+#endif
+#define HUGE_PAGESIZE   ( 4 << 20)
+
+#define BUFFER_SIZE     (16 << 20)
+
+
+#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#endif
diff --git a/ctest.c b/ctest.c
index 1844163..86dc226 100644 (file)
--- a/ctest.c
+++ b/ctest.c
@@ -129,4 +129,7 @@ BINARY_64
 ARCH_ARM
 #endif
 
+#if defined(__aarch64__)
+ARCH_ARM64
+#endif
 
index 4407e3d..7975c94 100644 (file)
--- a/getarch.c
+++ b/getarch.c
@@ -709,6 +709,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #else
 #endif
 
+#ifdef FORCE_ARMV8
+#define FORCE
+#define ARCHITECTURE    "ARM64"
+#define SUBARCHITECTURE "ARMV8"
+#define SUBDIRNAME      "arm64"
+#define ARCHCONFIG   "-DARMV8 " \
+       "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+       "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
+       "-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4"
+#define LIBNAME   "armv8"
+#define CORENAME  "ARMV8"
+#else
+#endif
+
+
 
 
 #ifndef FORCE
index f543cd0..b9b4bef 100644 (file)
@@ -18,6 +18,10 @@ ifeq ($(ARCH), arm)
 USE_TRMM = 1
 endif
 
+ifeq ($(ARCH), arm64)
+USE_TRMM = 1
+endif
+
 ifeq ($(TARGET), LOONGSON3B)                                                                                            
 USE_TRMM = 1
 endif
diff --git a/kernel/arm64/KERNEL b/kernel/arm64/KERNEL
new file mode 100644 (file)
index 0000000..aeccfbf
--- /dev/null
@@ -0,0 +1,46 @@
+ifndef SNRM2KERNEL
+SNRM2KERNEL = nrm2.c
+endif
+
+ifndef DNRM2KERNEL
+DNRM2KERNEL = nrm2.c
+endif
+
+ifndef CNRM2KERNEL
+CNRM2KERNEL = znrm2.c
+endif
+
+ifndef ZNRM2KERNEL
+ZNRM2KERNEL = znrm2.c
+endif
+
+ifndef SCABS_KERNEL
+SCABS_KERNEL   = ../generic/cabs.c
+endif
+
+ifndef DCABS_KERNEL
+DCABS_KERNEL   = ../generic/cabs.c
+endif
+
+ifndef QCABS_KERNEL
+QCABS_KERNEL   = ../generic/cabs.c
+endif
+
+ifndef LSAME_KERNEL
+LSAME_KERNEL   = ../generic/lsame.c
+endif
+
+ifndef SGEMM_BETA
+SGEMM_BETA = ../generic/gemm_beta.c
+endif
+ifndef DGEMM_BETA
+DGEMM_BETA = ../generic/gemm_beta.c
+endif
+ifndef CGEMM_BETA
+CGEMM_BETA = ../generic/zgemm_beta.c
+endif
+ifndef ZGEMM_BETA
+ZGEMM_BETA = ../generic/zgemm_beta.c
+endif
+
+
diff --git a/kernel/arm64/KERNEL.ARMV8 b/kernel/arm64/KERNEL.ARMV8
new file mode 100644 (file)
index 0000000..ecf278c
--- /dev/null
@@ -0,0 +1,134 @@
+SAMAXKERNEL  = ../arm/amax.c
+DAMAXKERNEL  = ../arm/amax.c
+CAMAXKERNEL  = ../arm/zamax.c
+ZAMAXKERNEL  = ../arm/zamax.c
+
+SAMINKERNEL  = ../arm/amin.c
+DAMINKERNEL  = ../arm/amin.c
+CAMINKERNEL  = ../arm/zamin.c
+ZAMINKERNEL  = ../arm/zamin.c
+
+SMAXKERNEL   = ../arm/max.c
+DMAXKERNEL   = ../arm/max.c
+
+SMINKERNEL   = ../arm/min.c
+DMINKERNEL   = ../arm/min.c
+
+ISAMAXKERNEL = ../arm/iamax.c
+IDAMAXKERNEL = ../arm/iamax.c
+ICAMAXKERNEL = ../arm/izamax.c
+IZAMAXKERNEL = ../arm/izamax.c
+
+ISAMINKERNEL = ../arm/iamin.c
+IDAMINKERNEL = ../arm/iamin.c
+ICAMINKERNEL = ../arm/izamin.c
+IZAMINKERNEL = ../arm/izamin.c
+
+ISMAXKERNEL  = ../arm/imax.c
+IDMAXKERNEL  = ../arm/imax.c
+
+ISMINKERNEL  = ../arm/imin.c
+IDMINKERNEL  = ../arm/imin.c
+
+SASUMKERNEL  = ../arm/asum.c
+DASUMKERNEL  = ../arm/asum.c
+CASUMKERNEL  = ../arm/zasum.c
+ZASUMKERNEL  = ../arm/zasum.c
+
+SAXPYKERNEL  = ../arm/axpy.c
+DAXPYKERNEL  = ../arm/axpy.c
+CAXPYKERNEL  = ../arm/zaxpy.c
+ZAXPYKERNEL  = ../arm/zaxpy.c
+
+SCOPYKERNEL  = ../arm/copy.c
+DCOPYKERNEL  = ../arm/copy.c
+CCOPYKERNEL  = ../arm/zcopy.c
+ZCOPYKERNEL  = ../arm/zcopy.c
+
+SDOTKERNEL   = ../arm/dot.c
+DDOTKERNEL   = ../arm/dot.c
+CDOTKERNEL   = ../arm/zdot.c
+ZDOTKERNEL   = ../arm/zdot.c
+
+SNRM2KERNEL  = ../arm/nrm2.c
+DNRM2KERNEL  = ../arm/nrm2.c
+CNRM2KERNEL  = ../arm/znrm2.c
+ZNRM2KERNEL  = ../arm/znrm2.c
+
+SROTKERNEL   = ../arm/rot.c
+DROTKERNEL   = ../arm/rot.c
+CROTKERNEL   = ../arm/zrot.c
+ZROTKERNEL   = ../arm/zrot.c
+
+SSCALKERNEL  = ../arm/scal.c
+DSCALKERNEL  = ../arm/scal.c
+CSCALKERNEL  = ../arm/zscal.c
+ZSCALKERNEL  = ../arm/zscal.c
+
+SSWAPKERNEL  = ../arm/swap.c
+DSWAPKERNEL  = ../arm/swap.c
+CSWAPKERNEL  = ../arm/zswap.c
+ZSWAPKERNEL  = ../arm/zswap.c
+
+SGEMVNKERNEL = ../arm/gemv_n.c
+DGEMVNKERNEL = ../arm/gemv_n.c
+CGEMVNKERNEL = ../arm/zgemv_n.c
+ZGEMVNKERNEL = ../arm/zgemv_n.c
+
+SGEMVTKERNEL = ../arm/gemv_t.c
+DGEMVTKERNEL = ../arm/gemv_t.c
+CGEMVTKERNEL = ../arm/zgemv_t.c
+ZGEMVTKERNEL = ../arm/zgemv_t.c
+
+STRMMKERNEL    = ../generic/trmmkernel_2x2.c
+DTRMMKERNEL    = ../generic/trmmkernel_2x2.c
+CTRMMKERNEL    = ../generic/ztrmmkernel_2x2.c
+ZTRMMKERNEL    = ../generic/ztrmmkernel_2x2.c
+
+SGEMMKERNEL    =  ../generic/gemmkernel_2x2.c          
+SGEMMONCOPY    =  ../generic/gemm_ncopy_2.c
+SGEMMOTCOPY    =  ../generic/gemm_tcopy_2.c
+SGEMMONCOPYOBJ =  sgemm_oncopy.o
+SGEMMOTCOPYOBJ =  sgemm_otcopy.o
+
+DGEMMKERNEL    =  ../generic/gemmkernel_2x2.c          
+DGEMMONCOPY    = ../generic/gemm_ncopy_2.c
+DGEMMOTCOPY    = ../generic/gemm_tcopy_2.c
+DGEMMONCOPYOBJ = dgemm_oncopy.o
+DGEMMOTCOPYOBJ = dgemm_otcopy.o
+
+CGEMMKERNEL    = ../generic/zgemmkernel_2x2.c
+CGEMMONCOPY    = ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c
+CGEMMONCOPYOBJ =  cgemm_oncopy.o
+CGEMMOTCOPYOBJ =  cgemm_otcopy.o
+
+ZGEMMKERNEL    = ../generic/zgemmkernel_2x2.c
+ZGEMMONCOPY    = ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c
+ZGEMMONCOPYOBJ =  zgemm_oncopy.o
+ZGEMMOTCOPYOBJ =  zgemm_otcopy.o
+
+STRSMKERNEL_LN =  ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT =  ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN =  ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT =  ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+
+
+
diff --git a/kernel/arm64/Makefile b/kernel/arm64/Makefile
new file mode 100644 (file)
index 0000000..efae70d
--- /dev/null
@@ -0,0 +1,2 @@
+clean ::
+
diff --git a/lapack/laswp/arm64/Makefile b/lapack/laswp/arm64/Makefile
new file mode 100644 (file)
index 0000000..434c82a
--- /dev/null
@@ -0,0 +1,33 @@
+TOPDIR = ../../..
+include ../../../Makefile.system
+
+ifeq ($(CORE), CORE2)
+LASWP  = ../generic/laswp_k_2.c
+ZLASWP = ../generic/zlaswp_k_2.c
+endif
+
+ifeq ($(CORE), OPTERON)
+LASWP  = ../generic/laswp_k_1.c
+ZLASWP = ../generic/zlaswp_k_1.c
+endif
+
+ifeq ($(CORE), PRESCOTT)
+LASWP  = ../generic/laswp_k_1.c
+ZLASWP = ../generic/zlaswp_k_1.c
+endif
+
+ifeq ($(DYNAMIC_ARCH), 1)
+LASWP  = ../generic/laswp_k_4.c
+ZLASWP = ../generic/zlaswp_k_4.c
+endif
+
+ifndef LASWP
+LASWP  = ../generic/laswp_k.c
+endif
+
+ifndef ZLASWP
+ZLASWP = ../generic/zlaswp_k.c
+endif
+
+include ../generic/Makefile
+
diff --git a/param.h b/param.h
index ec1767d..0628a19 100644 (file)
--- a/param.h
+++ b/param.h
@@ -1874,6 +1874,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define SYMV_P 16
 #endif
 
+#if defined(ARMV8)
+#define SNUMOPT                2
+#define DNUMOPT                2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M  2
+#define SGEMM_DEFAULT_UNROLL_N  2
+
+#define DGEMM_DEFAULT_UNROLL_M  2
+#define DGEMM_DEFAULT_UNROLL_N  2
+
+#define CGEMM_DEFAULT_UNROLL_M  2
+#define CGEMM_DEFAULT_UNROLL_N  2
+
+#define ZGEMM_DEFAULT_UNROLL_M  2
+#define ZGEMM_DEFAULT_UNROLL_N  2
+
+#define SGEMM_DEFAULT_P        128
+#define DGEMM_DEFAULT_P        128
+#define CGEMM_DEFAULT_P 96
+#define ZGEMM_DEFAULT_P 64
+
+#define SGEMM_DEFAULT_Q 240
+#define DGEMM_DEFAULT_Q 120
+#define CGEMM_DEFAULT_Q 120
+#define ZGEMM_DEFAULT_Q 120
+
+#define SGEMM_DEFAULT_R 12288
+#define DGEMM_DEFAULT_R 8192
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
+
+
+#define SYMV_P 16
+#endif
+
+
 
 
 #ifdef GENERIC