return eax & 0xffff;
}
+static inline void xgetbv(int op, int * eax, int * edx){
+ __asm__ __volatile__
+ ("xgetbv": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
+}
+
+int support_avx(){
+ int eax, ebx, ecx, edx;
+ int ret=0;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0){
+ xgetbv(0, &eax, &edx);
+ if((eax & 6) == 6){
+ ret=1; //OS support AVX
+ }
+ }
+ return ret;
+}
+
int get_vendor(void){
int eax, ebx, ecx, edx;
char vendor[13];
if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
#ifndef NO_AVX
- if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX;
+ if (support_avx()) feature |= HAVE_AVX;
#endif
if (have_excpuid() >= 0x01) {
return CPUTYPE_NEHALEM;
case 10:
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
- return CPUTYPE_SANDYBRIDGE;
+ if(support_avx())
+ return CPUTYPE_SANDYBRIDGE;
+ else
+ return CPUTYPE_NEHALEM; //OS doesn't support AVX
case 12:
//Xeon Processor 5600 (Westmere-EP)
return CPUTYPE_NEHALEM;
case 13:
//Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
- return CPUTYPE_SANDYBRIDGE;
+ if(support_avx())
+ return CPUTYPE_SANDYBRIDGE;
+ else
+ return CPUTYPE_NEHALEM;
case 15:
//Xeon Processor E7 (Westmere-EX)
return CPUTYPE_NEHALEM;
case 3:
switch (model) {
case 10:
- return CPUTYPE_SANDYBRIDGE;
+ if(support_avx())
+ return CPUTYPE_SANDYBRIDGE;
+ else
+ return CPUTYPE_NEHALEM;
}
break;
}
return CORE_NEHALEM;
case 10:
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
- return CORE_SANDYBRIDGE;
+ if(support_avx())
+ return CORE_SANDYBRIDGE;
+ else
+ return CORE_NEHALEM; //OS doesn't support AVX
case 12:
//Xeon Processor 5600 (Westmere-EP)
return CORE_NEHALEM;
case 13:
//Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
- return CORE_SANDYBRIDGE;
+ if(support_avx())
+ return CORE_SANDYBRIDGE;
+ else
+ return CORE_NEHALEM; //OS doesn't support AVX
case 15:
//Xeon Processor E7 (Westmere-EX)
return CORE_NEHALEM;
case 3:
switch (model) {
case 10:
- return CORE_SANDYBRIDGE;
+ if(support_avx())
+ return CORE_SANDYBRIDGE;
+ else
+ return CORE_NEHALEM; //OS doesn't support AVX
}
break;
}
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
+static inline void xgetbv(int op, int * eax, int * edx){
+ __asm__ __volatile__
+ ("xgetbv": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
+}
+
+int support_avx(){
+ int eax, ebx, ecx, edx;
+ int ret=0;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0){
+ xgetbv(0, &eax, &edx);
+ if((eax & 6) == 6){
+ ret=1; //OS support AVX
+ }
+ }
+ return ret;
+}
+
static int get_vendor(void){
int eax, ebx, ecx, edx;
char vendor[13];
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
//Intel Core i7-3000 / Xeon E5
- if (model == 10 || model == 13) return &gotoblas_SANDYBRIDGE;
+ if (model == 10 || model == 13) {
+ if(support_avx())
+ return &gotoblas_SANDYBRIDGE;
+ else{
+ fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Nehalem kernels.\n");
+ return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
+ }
+ }
return NULL;
case 3:
//Intel Sandy Bridge 22nm (Ivy Bridge?)
- if (model == 10) return &gotoblas_SANDYBRIDGE;
+ if (model == 10) {
+ if(support_avx())
+ return &gotoblas_SANDYBRIDGE;
+ else{
+ fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Nehalem kernels.\n");
+ return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
+ }
+ }
return NULL;
}
case 0xf:
if (gotoblas && gotoblas -> init) {
gotoblas -> init();
} else {
- fprintf(stderr, "GotoBLAS : Architecture Initialization failed. No initialization function found.\n");
+ fprintf(stderr, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
exit(1);
}