# You can specify the target architecture, otherwise it's
# automatically detected.
-# TARGET = PENRYN
+TARGET = ARMV7
# If you want to support multiple architecture in one binary
# DYNAMIC_ARCH = 1
# FC = gfortran
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
-# CC = x86_64-w64-mingw32-gcc
-# FC = x86_64-w64-mingw32-gfortran
+CC = arm-linux-gnueabihf-gcc
+FC = arm-linux-gnueabihf-gfortran
# If you use the cross compiler, please set this host compiler.
-# HOSTCC = gcc
+HOSTCC = gcc
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
-# BINARY=64
+#BINARY=32
# About threaded BLAS. It will be automatically detected if you don't
# specify it.
# For force setting for single threaded, specify USE_THREAD = 0
# For force setting for multi threaded, specify USE_THREAD = 1
-# USE_THREAD = 0
+USE_THREAD = 0
# If you're going to use this library with OpenMP, please comment it in.
# USE_OPENMP = 1
# You can define maximum number of threads. Basically it should be
# less than actual number of cores. If you don't specify one, it's
# automatically detected by the the script.
-# NUM_THREADS = 24
+NUM_THREADS = 4
# if you don't need generate the shared library, please comment it in.
# NO_SHARED = 1
# If you don't need CBLAS interface, please comment it in.
# NO_CBLAS = 1
-# If you only want CBLAS interface without installing Fortran compiler,
-# please comment it in.
-# ONLY_CBLAS = 1
-
# If you don't need LAPACK, please comment it in.
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
-# NO_LAPACK = 1
+#NO_LAPACK = 1
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
-# NO_LAPACKE = 1
+#NO_LAPACKE = 1
# If you want to use legacy threaded Level 3 implementation.
# USE_SIMPLE_THREADED_LEVEL3 = 1
# Unfortunately most of kernel won't give us high quality buffer.
# BLAS tries to find the best region before entering main function,
# but it will consume time. If you don't like it, you can disable one.
-# NO_WARMUP = 1
+NO_WARMUP = 1
# If you want to disable CPU/Memory affinity on Linux.
-# NO_AFFINITY = 1
+NO_AFFINITY = 1
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
# and OS. However, the performance is low.
# Common Optimization Flag;
# The default -O2 is enough.
-# COMMON_OPT = -O2
+COMMON_OPT = -O0 -marm -mfpu=vfpv3 -fno-omit-frame-pointer
# Profiling flags
COMMON_PROF = -pg
# Build Debug version
-# DEBUG = 1
+DEBUG = 1
#
# End of user configuration
GETARCH_FLAGS += -static
endif
-#if don't use Fortran, it will only compile CBLAS.
-ifeq ($(ONLY_CBLAS), 1)
-NO_LAPACK = 1
-else
-ONLY_CBLAS = 0
-endif
-
# This operation is expensive, so execution should be once.
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
# Generating Makefile.conf and config.h
-DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
+DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all)
ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
-DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
+DYNAMIC_CORE += SANDYBRIDGE
+#BULLDOZER PILEDRIVER
endif
endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
-DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
+DYNAMIC_CORE += SANDYBRIDGE
+#BULLDOZER PILEDRIVER
endif
endif
BINARY_DEFINED = 1
endif
+ifeq ($(ARCH), arm)
+NO_BINARY_MODE = 1
+BINARY_DEFINED = 1
+endif
#
# C Compiler dependent settings
#
LIBS = $(TOPDIR)/$(LIBNAME)
LIBS_P = $(TOPDIR)/$(LIBNAME_P)
-
-LIB_COMPONENTS = BLAS
-ifneq ($(NO_CBLAS), 1)
-LIB_COMPONENTS += CBLAS
-endif
-
-ifneq ($(NO_LAPACK), 1)
-LIB_COMPONENTS += LAPACK
-ifneq ($(NO_LAPACKE), 1)
-LIB_COMPONENTS += LAPACKE
-endif
-endif
-
-ifeq ($(ONLY_CBLAS), 1)
-LIB_COMPONENTS = CBLAS
-endif
-
export OSNAME
export ARCH
export CORE
export CROSS
export CROSS_SUFFIX
export NOFORTRAN
-export NO_FBLAS
export EXTRALIB
export CEXTRALIB
export FEXTRALIB
#include "common_mips64.h"
#endif
+#ifdef ARCH_ARM
+#include "common_arm.h"
+#endif
+
#ifdef OS_LINUX
#include "common_linux.h"
#endif
#include "common_level2.h"
#include "common_level3.h"
#include "common_lapack.h"
-
#ifdef CBLAS
-# define OPENBLAS_CONST /* see comment in cblas.h */
-# include "cblas.h"
+/* This header file is generated from "cblas.h" (see Makefile.prebuild). */
+#include "cblas_noconst.h"
#endif
#ifndef ASSEMBLER
--- /dev/null
+/*****************************************************************************
+Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ 3. Neither the name of the ISCAS nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+**********************************************************************************/
+
+/*********************************************************************/
+/* Copyright 2009, 2010 The University of Texas at Austin. */
+/* All rights reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the following */
+/* conditions are met: */
+/* */
+/* 1. Redistributions of source code must retain the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer. */
+/* */
+/* 2. Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
+/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
+/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
+/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
+/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
+/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
+/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
+/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
+/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
+/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
+/* POSSIBILITY OF SUCH DAMAGE. */
+/* */
+/* The views and conclusions contained in the software and */
+/* documentation are those of the authors and should not be */
+/* interpreted as representing official policies, either expressed */
+/* or implied, of The University of Texas at Austin. */
+/*********************************************************************/
+
+#ifndef COMMON_ARM
+#define COMMON_ARM
+
+#define MB
+#define WMB
+
+#define INLINE inline
+
+#define RETURN_BY_COMPLEX
+
+#ifndef ASSEMBLER
+
+static void INLINE blas_lock(volatile unsigned long *address){
+
+// long int ret, val = 1;
+/*
+ do {
+ while (*address) {YIELDING;};
+
+ __asm__ __volatile__(
+ "1: ll %0, %3\n"
+ " ori %2, %0, 1\n"
+ " sc %2, %1\n"
+ " beqz %2, 1b\n"
+ " andi %2, %0, 1\n"
+ " sync\n"
+ : "=&r" (val), "=m" (address), "=&r" (ret)
+ : "m" (address)
+ : "memory");
+
+ } while (ret);
+*/
+}
+
+static inline unsigned int rpcc(void){
+ unsigned long ret=0;
+
+ return ret;
+}
+
+static inline int blas_quickdivide(blasint x, blasint y){
+ return x / y;
+}
+
+#if defined(DOUBLE)
+#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
+#else
+#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
+#endif
+
+#define GET_IMAGE_CANCEL
+
+#endif
+
+
+#ifndef F_INTERFACE
+#define REALNAME ASMNAME
+#else
+#define REALNAME ASMFNAME
+#endif
+
+#if defined(ASSEMBLER) && !defined(NEEDPARAM)
+
+#define PROLOGUE \
+ .arm ;\
+ .global REALNAME ;\
+ .func REALNAME ;\
+REALNAME:
+
+#define EPILOGUE
+
+#define PROFCODE
+
+#endif
+
+
+#define SEEK_ADDRESS
+
+#ifndef PAGESIZE
+#define PAGESIZE ( 4 << 10)
+#endif
+#define HUGE_PAGESIZE ( 4 << 20)
+
+#define BUFFER_SIZE (16 << 20)
+
+
+#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#endif
#if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__)
BINARY_64
#endif
+
+#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__)
+ARCH_ARM
+#endif
+
+
#define CORENAME "generic"
#endif
+#ifdef FORCE_ARMV7
+#define FORCE
+#define ARCHITECTURE "ARM"
+#define SUBARCHITECTURE "ARMV7"
+#define SUBDIRNAME "arm"
+#define ARCHCONFIG "-DARMV7 " \
+ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
+#define LIBNAME "armv7"
+#define CORENAME "ARMV7"
+#else
+#endif
+
+
#ifndef FORCE
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
#define SYMV_P 16
#endif
+
+#ifdef ARMV7
+#define SNUMOPT 2
+#define DNUMOPT 2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M 2
+#define SGEMM_DEFAULT_UNROLL_N 2
+
+#define DGEMM_DEFAULT_UNROLL_M 8
+#define DGEMM_DEFAULT_UNROLL_N 2
+
+#define CGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_N 2
+
+#define ZGEMM_DEFAULT_UNROLL_M 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+
+#define SGEMM_DEFAULT_P 64
+#define DGEMM_DEFAULT_P 128
+#define CGEMM_DEFAULT_P 24
+#define ZGEMM_DEFAULT_P 20
+
+#define SGEMM_DEFAULT_Q 192
+#define DGEMM_DEFAULT_Q 128
+#define CGEMM_DEFAULT_Q 128
+#define ZGEMM_DEFAULT_Q 64
+
+#define SGEMM_DEFAULT_R 512
+#define DGEMM_DEFAULT_R 2048
+#define CGEMM_DEFAULT_R 512
+#define ZGEMM_DEFAULT_R 512
+
+
+
+#define SYMV_P 16
+#endif
+
+
+
#ifdef GENERIC
#define SNUMOPT 2