do {
while (*address) {YIELDING;};
+#if defined(_MSC_VER) && !defined(__clang__)
+ // use intrinsic instead of inline assembly
+ ret = _InterlockedExchange(address, 1);
+ // inline assembly
+ /*__asm {
+ mov eax, address
+ mov ebx, 1
+ xchg [eax], ebx
+ mov ret, ebx
+ }*/
+#else
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");
+#endif
} while (ret);
}
static __inline unsigned long long rpcc(void){
+#if defined(_MSC_VER) && !defined(__clang__)
+ return __rdtsc(); // use MSVC intrinsic
+#else
unsigned int a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((unsigned long long)a + ((unsigned long long)d << 32));
+#endif
};
static __inline unsigned long getstackaddr(void){
+#if defined(_MSC_VER) && !defined(__clang__)
+ return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
+#else
unsigned long addr;
__asm__ __volatile__ ("mov %%esp, %0"
: "=r"(addr) : : "memory");
return addr;
+#endif
};
static __inline long double sqrt_long(long double val) {
+#if defined(_MSC_VER) && !defined(__clang__)
+ return sqrt(val); // not sure if this will use fsqrt
+#else
long double result;
__asm__ __volatile__ ("fldt %1\n"
"fsqrt\n"
"fstpt %0\n" : "=m" (result) : "m"(val));
return result;
+#endif
}
#define SQRT(a) sqrt_long(a)
y = blas_quick_divide_table[y];
+#if defined(_MSC_VER) && !defined(__clang__)
+ (void*)result;
+ return x*y;
+#else
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
return result;
+#endif
}
#endif
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
+#if defined(_MSC_VER) && !defined(__clang__)
+#define CONSTRUCTOR __cdecl
+#define DESTRUCTOR __cdecl
+#else
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
+#endif
#ifdef DYNAMIC_ARCH
gotoblas_t *gotoblas = NULL;
blas_shutdown();
}
+#if defined(_MSC_VER) && !defined(__clang__)
+BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
+{
+ switch (ul_reason_for_call)
+ {
+ case DLL_PROCESS_ATTACH:
+ gotoblas_init();
+ break;
+ case DLL_THREAD_ATTACH:
+ break;
+ case DLL_THREAD_DETACH:
+ break;
+ case DLL_PROCESS_DETACH:
+ gotoblas_quit();
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+#endif
+
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
/* Don't call me; this is just work around for PGI / Sun bug */
void gotoblas_dummy_for_PGI(void) {