common:
* Fixed blasint undefined bug in <cblas.h> file. Other software
could include this header successfully(Refs issue #13 on github)
+ * Fixed the SEGFAULT bug on 64 cores. On SMP server, the number
+ of CPUs or cores should be less than or equal to 64.(Refs issue #14
+ on github)
+ * Support "void goto_set_num_threads(int num_threads)" and "void
+ openblas_set_num_threads(int num_threads)" when USE_OPENMP=1
+
x86/x86_64:
*
MIPS64:
or
export OMP_NUM_THREADS=4
-The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS.
+The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
+
+If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable.
4.2 Set the number of threads with calling functions. for example,
void goto_set_num_threads(int num_threads);
or
void openblas_set_num_threads(int num_threads);
+If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
+
5.Report Bugs
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
8.ChangeLog
-Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
\ No newline at end of file
+Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
+
+9.Known Issues
+* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
+ is 64. On 32 bits, it is 32.
int blas_server_avail = 0;
+void goto_set_num_threads(int num_threads) {
+
+ if (num_threads < 1) num_threads = blas_num_threads;
+
+ if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
+
+ if (num_threads > blas_num_threads) {
+ blas_num_threads = num_threads;
+ }
+
+ blas_cpu_number = num_threads;
+
+ omp_set_num_threads(blas_cpu_number);
+
+}
+void openblas_set_num_threads(int num_threads) {
+
+ goto_set_num_threads(num_threads);
+}
+
int blas_thread_init(void){
blas_get_cpu_number();
return count;
}
+/***
+ Known issue: The number of CPUs/cores should less
+ than sizeof(unsigned long). On 64 bits, the limit
+ is 64. On 32 bits, it is 32.
+***/
static inline unsigned long get_cpumap(int node) {
int infile;
unsigned long affinity;
char name[160];
+ char cpumap[160];
char *p, *dummy;
-
+ int i=0;
+
sprintf(name, CPUMAP_NAME, node);
infile = open(name, O_RDONLY);
if (infile != -1) {
- read(infile, name, sizeof(name));
-
+ read(infile, cpumap, sizeof(cpumap));
+ p = cpumap;
+ while (*p != '\n' && i<160){
+ if(*p != ',') {
+ name[i++]=*p;
+ }
+ p++;
+ }
p = name;
- while ((*p == '0') || (*p == ',')) p++;
+ // while ((*p == '0') || (*p == ',')) p++;
- affinity = strtol(p, &dummy, 16);
+ affinity = strtoul(p, &dummy, 16);
close(infile);
}
unsigned long share;
int cpu;
- common -> avail = (1UL << common -> num_procs) - 1;
+ if(common->num_procs > 64){
+ fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs);
+ exit(1);
+ }else if(common->num_procs == 64){
+ common -> avail = 0xFFFFFFFFFFFFFFFFUL;
+ }else
+ common -> avail = (1UL << common -> num_procs) - 1;
#ifdef DEBUG
fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail);
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
#endif
- lprocmask = (1UL << common -> final_num_procs) - 1;
+ if(common->final_num_procs > 64){
+ fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs);
+ exit(1);
+ }else if(common->final_num_procs == 64){
+ lprocmask = 0xFFFFFFFFFFFFFFFFUL;
+ }else
+ lprocmask = (1UL << common -> final_num_procs) - 1;
#ifndef USE_OPENMP
lprocmask &= *(unsigned long *)&cpu_orig_mask[0];