From d13788d1b4027d9e545694b7cad71a877bcab3dd Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sat, 28 Sep 2013 19:10:32 +0200 Subject: [PATCH] common files modified for ARM --- Makefile.rule | 30 +++++------ Makefile.system | 37 ++++--------- common.h | 9 ++-- common_arm.h | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ctest.c | 6 +++ getarch.c | 15 ++++++ param.h | 43 +++++++++++++++ 7 files changed, 255 insertions(+), 48 deletions(-) create mode 100644 common_arm.h diff --git a/Makefile.rule b/Makefile.rule index e357d5c..a7aa087 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -12,7 +12,7 @@ VERSION = 0.2.8 # You can specify the target architecture, otherwise it's # automatically detected. -# TARGET = PENRYN +TARGET = ARMV7 # If you want to support multiple architecture in one binary # DYNAMIC_ARCH = 1 @@ -25,20 +25,20 @@ VERSION = 0.2.8 # FC = gfortran # Even you can specify cross compiler. Meanwhile, please set HOSTCC. -# CC = x86_64-w64-mingw32-gcc -# FC = x86_64-w64-mingw32-gfortran +CC = arm-linux-gnueabihf-gcc +FC = arm-linux-gnueabihf-gfortran # If you use the cross compiler, please set this host compiler. -# HOSTCC = gcc +HOSTCC = gcc # If you need 32bit binary, define BINARY=32, otherwise define BINARY=64 -# BINARY=64 +#BINARY=32 # About threaded BLAS. It will be automatically detected if you don't # specify it. # For force setting for single threaded, specify USE_THREAD = 0 # For force setting for multi threaded, specify USE_THREAD = 1 -# USE_THREAD = 0 +USE_THREAD = 0 # If you're going to use this library with OpenMP, please comment it in. # USE_OPENMP = 1 @@ -46,7 +46,7 @@ VERSION = 0.2.8 # You can define maximum number of threads. Basically it should be # less than actual number of cores. If you don't specify one, it's # automatically detected by the the script. -# NUM_THREADS = 24 +NUM_THREADS = 4 # if you don't need generate the shared library, please comment it in. # NO_SHARED = 1 @@ -54,16 +54,12 @@ VERSION = 0.2.8 # If you don't need CBLAS interface, please comment it in. # NO_CBLAS = 1 -# If you only want CBLAS interface without installing Fortran compiler, -# please comment it in. -# ONLY_CBLAS = 1 - # If you don't need LAPACK, please comment it in. # If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1. -# NO_LAPACK = 1 +#NO_LAPACK = 1 # If you don't need LAPACKE (C Interface to LAPACK), please comment it in. -# NO_LAPACKE = 1 +#NO_LAPACKE = 1 # If you want to use legacy threaded Level 3 implementation. # USE_SIMPLE_THREADED_LEVEL3 = 1 @@ -76,10 +72,10 @@ VERSION = 0.2.8 # Unfortunately most of kernel won't give us high quality buffer. # BLAS tries to find the best region before entering main function, # but it will consume time. If you don't like it, you can disable one. -# NO_WARMUP = 1 +NO_WARMUP = 1 # If you want to disable CPU/Memory affinity on Linux. -# NO_AFFINITY = 1 +NO_AFFINITY = 1 # Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers # and OS. However, the performance is low. @@ -127,13 +123,13 @@ VERSION = 0.2.8 # Common Optimization Flag; # The default -O2 is enough. -# COMMON_OPT = -O2 +COMMON_OPT = -O0 -marm -mfpu=vfpv3 -fno-omit-frame-pointer # Profiling flags COMMON_PROF = -pg # Build Debug version -# DEBUG = 1 +DEBUG = 1 # # End of user configuration diff --git a/Makefile.system b/Makefile.system index 858160f..e5358f6 100644 --- a/Makefile.system +++ b/Makefile.system @@ -82,19 +82,12 @@ ifeq ($(HOSTCC), loongcc) GETARCH_FLAGS += -static endif -#if don't use Fortran, it will only compile CBLAS. -ifeq ($(ONLY_CBLAS), 1) -NO_LAPACK = 1 -else -ONLY_CBLAS = 0 -endif - # This operation is expensive, so execution should be once. ifndef GOTOBLAS_MAKEFILE export GOTOBLAS_MAKEFILE = 1 # Generating Makefile.conf and config.h -DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all) +DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all) ifndef TARGET_CORE include $(TOPDIR)/Makefile.conf @@ -331,14 +324,16 @@ ifeq ($(ARCH), x86) DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif ifeq ($(ARCH), x86_64) DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif @@ -368,6 +363,10 @@ NO_BINARY_MODE = 1 BINARY_DEFINED = 1 endif +ifeq ($(ARCH), arm) +NO_BINARY_MODE = 1 +BINARY_DEFINED = 1 +endif # # C Compiler dependent settings # @@ -892,23 +891,6 @@ LIBZIPNAME = $(LIBNAME:.$(LIBSUFFIX)=.zip) LIBS = $(TOPDIR)/$(LIBNAME) LIBS_P = $(TOPDIR)/$(LIBNAME_P) - -LIB_COMPONENTS = BLAS -ifneq ($(NO_CBLAS), 1) -LIB_COMPONENTS += CBLAS -endif - -ifneq ($(NO_LAPACK), 1) -LIB_COMPONENTS += LAPACK -ifneq ($(NO_LAPACKE), 1) -LIB_COMPONENTS += LAPACKE -endif -endif - -ifeq ($(ONLY_CBLAS), 1) -LIB_COMPONENTS = CBLAS -endif - export OSNAME export ARCH export CORE @@ -935,7 +917,6 @@ export USE_OPENMP export CROSS export CROSS_SUFFIX export NOFORTRAN -export NO_FBLAS export EXTRALIB export CEXTRALIB export FEXTRALIB diff --git a/common.h b/common.h index 309f246..418ed25 100644 --- a/common.h +++ b/common.h @@ -363,6 +363,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #include "common_mips64.h" #endif +#ifdef ARCH_ARM +#include "common_arm.h" +#endif + #ifdef OS_LINUX #include "common_linux.h" #endif @@ -574,10 +578,9 @@ typedef struct { #include "common_level2.h" #include "common_level3.h" #include "common_lapack.h" - #ifdef CBLAS -# define OPENBLAS_CONST /* see comment in cblas.h */ -# include "cblas.h" +/* This header file is generated from "cblas.h" (see Makefile.prebuild). */ +#include "cblas_noconst.h" #endif #ifndef ASSEMBLER diff --git a/common_arm.h b/common_arm.h new file mode 100644 index 0000000..b61efd7 --- /dev/null +++ b/common_arm.h @@ -0,0 +1,163 @@ +/***************************************************************************** +Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#ifndef COMMON_ARM +#define COMMON_ARM + +#define MB +#define WMB + +#define INLINE inline + +#define RETURN_BY_COMPLEX + +#ifndef ASSEMBLER + +static void INLINE blas_lock(volatile unsigned long *address){ + +// long int ret, val = 1; +/* + do { + while (*address) {YIELDING;}; + + __asm__ __volatile__( + "1: ll %0, %3\n" + " ori %2, %0, 1\n" + " sc %2, %1\n" + " beqz %2, 1b\n" + " andi %2, %0, 1\n" + " sync\n" + : "=&r" (val), "=m" (address), "=&r" (ret) + : "m" (address) + : "memory"); + + } while (ret); +*/ +} + +static inline unsigned int rpcc(void){ + unsigned long ret=0; + + return ret; +} + +static inline int blas_quickdivide(blasint x, blasint y){ + return x / y; +} + +#if defined(DOUBLE) +#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") +#else +#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") +#endif + +#define GET_IMAGE_CANCEL + +#endif + + +#ifndef F_INTERFACE +#define REALNAME ASMNAME +#else +#define REALNAME ASMFNAME +#endif + +#if defined(ASSEMBLER) && !defined(NEEDPARAM) + +#define PROLOGUE \ + .arm ;\ + .global REALNAME ;\ + .func REALNAME ;\ +REALNAME: + +#define EPILOGUE + +#define PROFCODE + +#endif + + +#define SEEK_ADDRESS + +#ifndef PAGESIZE +#define PAGESIZE ( 4 << 10) +#endif +#define HUGE_PAGESIZE ( 4 << 20) + +#define BUFFER_SIZE (16 << 20) + + +#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +#endif diff --git a/ctest.c b/ctest.c index 4135192..1844163 100644 --- a/ctest.c +++ b/ctest.c @@ -124,3 +124,9 @@ ARCH_IA64 #if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__) BINARY_64 #endif + +#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__) +ARCH_ARM +#endif + + diff --git a/getarch.c b/getarch.c index 3ffda62..3264a76 100644 --- a/getarch.c +++ b/getarch.c @@ -679,6 +679,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "generic" #endif +#ifdef FORCE_ARMV7 +#define FORCE +#define ARCHITECTURE "ARM" +#define SUBARCHITECTURE "ARMV7" +#define SUBDIRNAME "arm" +#define ARCHCONFIG "-DARMV7 " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " +#define LIBNAME "armv7" +#define CORENAME "ARMV7" +#else +#endif + + #ifndef FORCE #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ diff --git a/param.h b/param.h index 0c3df69..79c18f7 100644 --- a/param.h +++ b/param.h @@ -1793,6 +1793,49 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif + +#ifdef ARMV7 +#define SNUMOPT 2 +#define DNUMOPT 2 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 + +#define DGEMM_DEFAULT_UNROLL_M 8 +#define DGEMM_DEFAULT_UNROLL_N 2 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 64 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 24 +#define ZGEMM_DEFAULT_P 20 + +#define SGEMM_DEFAULT_Q 192 +#define DGEMM_DEFAULT_Q 128 +#define CGEMM_DEFAULT_Q 128 +#define ZGEMM_DEFAULT_Q 64 + +#define SGEMM_DEFAULT_R 512 +#define DGEMM_DEFAULT_R 2048 +#define CGEMM_DEFAULT_R 512 +#define ZGEMM_DEFAULT_R 512 + + + +#define SYMV_P 16 +#endif + + + #ifdef GENERIC #define SNUMOPT 2 -- 2.7.4