#ifndef ASSEMBLER
+#ifdef C_MSVC
+#include <intrin.h>
+#endif
+
#define MB
#define WMB
do {
while (*address) {YIELDING;};
-
+
+#if defined(_MSC_VER) && !defined(__clang__)
+ // use intrinsic instead of inline assembly
+ ret = _InterlockedExchange((volatile LONG *)address, 1);
+ // inline assembly
+ /*__asm {
+ mov eax, address
+ mov ebx, 1
+ xchg [eax], ebx
+ mov ret, ebx
+ }*/
+#else
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");
+#endif
} while (ret);
}
+#define BLAS_LOCK_DEFINED
static __inline unsigned long long rpcc(void){
+#if defined(_MSC_VER) && !defined(__clang__)
+ return __rdtsc(); // use MSVC intrinsic
+#else
unsigned int a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
-
- return ((unsigned long long)a + ((unsigned long long)d << 32));
+
+ return ((unsigned long long)a + ((unsigned long long)d << 32));
+#endif
};
+#define RPCC_DEFINED
static __inline unsigned long getstackaddr(void){
+#if defined(_MSC_VER) && !defined(__clang__)
+ return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
+#else
unsigned long addr;
__asm__ __volatile__ ("mov %%esp, %0"
: "=r"(addr) : : "memory");
- return addr;
+ return addr;
+#endif
};
static __inline long double sqrt_long(long double val) {
+#if defined(_MSC_VER) && !defined(__clang__)
+ return sqrt(val); // not sure if this will use fsqrt
+#else
long double result;
__asm__ __volatile__ ("fldt %1\n"
"fsqrt\n"
"fstpt %0\n" : "=m" (result) : "m"(val));
return result;
+#endif
}
#define SQRT(a) sqrt_long(a)
#define WHEREAMI
-static inline int WhereAmI(void){
+static __inline int WhereAmI(void){
int eax, ebx, ecx, edx;
int apicid;
if (y <= 1) return x;
+#if defined(_MSC_VER) && !defined(__clang__)
+ result = x/y;
+ return result;
+#else
+
y = blas_quick_divide_table[y];
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
return result;
+#endif
}
#endif
#define MMXSTORE movd
#endif
-#if defined(PILEDRIVER) || defined(BULLDOZER)
+#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
//Enable some optimazation for barcelona.
#define BARCELONA_OPTIMIZATION
#endif
#define PROFCODE
+#ifdef __clang__
+#define EPILOGUE .end
+#else
#define EPILOGUE .end REALNAME
#endif
+#endif
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
#define PROLOGUE \
#define EPILOGUE \
.size REALNAME, .-REALNAME; \
- .section .note.GNU-stack,"",%progbits
+ .section .note.GNU-stack,"",@progbits
#endif
#ifndef ALIGN_6
#define ALIGN_6 .align 64
#endif
-// ffreep %st(0).
+// ffreep %st(0).
// Because Clang didn't support ffreep, we directly use the opcode.
-// Please check out http://www.sandpile.org/x86/opc_fpu.htm
+// Please check out http://www.sandpile.org/x86/opc_fpu.htm
#ifndef ffreep
#define ffreep .byte 0xdf, 0xc0 #
#endif