#ifndef ASSEMBLER
+#ifdef C_MSVC
+#include <intrin.h>
+#endif
+
#ifdef C_SUN
#define __asm__ __asm
#define __volatile__
#endif
*/
-#define MB
-#define WMB
+#ifdef __GNUC__
+#define MB do { __asm__ __volatile__("": : :"memory"); } while (0)
+#define WMB do { __asm__ __volatile__("": : :"memory"); } while (0)
+#else
+#define MB do {} while (0)
+#define WMB do {} while (0)
+#endif
static void __inline blas_lock(volatile BLASULONG *address){
+#ifndef C_MSVC
int ret;
+#else
+ BLASULONG ret;
+#endif
do {
while (*address) {YIELDING;};
+#ifndef C_MSVC
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");
-
+#else
+ ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
+#endif
} while (ret);
+
}
+#define BLAS_LOCK_DEFINED
static __inline BLASULONG rpcc(void){
+#ifdef C_MSVC
+ return __rdtsc();
+#else
BLASULONG a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((BLASULONG)a + ((BLASULONG)d << 32));
+#endif
}
+#define RPCC_DEFINED
#define RPCC64BIT
+#ifndef C_MSVC
static __inline BLASULONG getstackaddr(void){
BLASULONG addr;
return addr;
}
+#endif
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
- __asm__ __volatile__("cpuid"
+#ifdef C_MSVC
+ int cpuinfo[4];
+ __cpuid(cpuinfo, op);
+ *eax=cpuinfo[0];
+ *ebx=cpuinfo[1];
+ *ecx=cpuinfo[2];
+ *edx=cpuinfo[3];
+#else
+ __asm__ __volatile__("mov $0, %%ecx;"
+ "cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (op));
+#endif
}
/*
#define WHEREAMI
*/
-static inline int WhereAmI(void){
+static __inline int WhereAmI(void){
int eax, ebx, ecx, edx;
int apicid;
#define GET_IMAGE_CANCEL
#ifdef SMP
-#ifdef USE64BITINT
+#if defined(USE64BITINT)
static __inline blasint blas_quickdivide(blasint x, blasint y){
return x / y;
}
+#elif defined (C_MSVC)
+static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
+ return x / y;
+}
#else
extern unsigned int blas_quick_divide_table[];
if (y <= 1) return x;
+#if (MAX_CPU_NUMBER > 64)
+ if (y > 64) {
+ result = x / y;
+ return result;
+ }
+#endif
+
y = blas_quick_divide_table[y];
- __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
+ __asm__ __volatile__ ("mull %0" :"=d" (result), "+a"(x) : "0" (y));
return result;
}
#define RETURN_BY_STACK
#endif
+#ifdef F_INTERFACE_FLANG
+#define RETURN_BY_STACK
+#endif
+
#ifdef F_INTERFACE_PGI
#define RETURN_BY_STACK
#endif
#ifdef ASSEMBLER
-#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
-//Enable some optimazation for barcelona.
+#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
+//Enable some optimization for barcelona.
#define BARCELONA_OPTIMIZATION
#endif
#define PROFCODE
-#define EPILOGUE .end REALNAME
+#define EPILOGUE .end
#endif
-#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(__ELF__) || defined(C_PGI)
#define PROLOGUE \
.text; \
.align 512; \