/*****************************************************************************
-Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
+Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
#define MAX_NODES 16
#define MAX_CPUS 256
+#define NCPUBITS (8*sizeof(unsigned long))
+#define MAX_BITMASK_LEN (MAX_CPUS/NCPUBITS)
+#define CPUELT(cpu) ((cpu) / NCPUBITS)
+#define CPUMASK(cpu) ((unsigned long) 1UL << ((cpu) % NCPUBITS))
+
#define SH_MAGIC 0x510510
int num_nodes;
int num_procs;
int final_num_procs;
- unsigned long avail;
-
+ unsigned long avail [MAX_BITMASK_LEN];
+ int avail_count;
unsigned long cpu_info [MAX_CPUS];
- unsigned long node_info [MAX_NODES];
+ unsigned long node_info [MAX_NODES][MAX_BITMASK_LEN];
int cpu_use[MAX_CPUS];
} shm_t;
static int shmid, pshmid;
static void *paddr;
-static unsigned long lprocmask, lnodemask;
+static unsigned long lprocmask[MAX_BITMASK_LEN], lnodemask;
+static int lprocmask_count = 0;
static int numprocs = 1;
static int numnodes = 1;
than sizeof(unsigned long). On 64 bits, the limit
is 64. On 32 bits, it is 32.
***/
-static inline unsigned long get_cpumap(int node) {
+static inline void get_cpumap(int node, unsigned long * node_info) {
int infile;
- unsigned long affinity;
+ unsigned long affinity[32];
char name[160];
char cpumap[160];
- char *p, *dummy;
+ char *dummy;
int i=0;
+ int count=0;
+ int k=0;
sprintf(name, CPUMAP_NAME, node);
infile = open(name, O_RDONLY);
+ for(i=0; i<32; i++){
+ affinity[i] = 0;
+ }
- affinity = 0;
-
if (infile != -1) {
read(infile, cpumap, sizeof(cpumap));
- p = cpumap;
- while (*p != '\n' && i<160){
- if(*p != ',') {
- name[i++]=*p;
- }
- p++;
- }
- p = name;
- // while ((*p == '0') || (*p == ',')) p++;
+ for(i=0; i<160; i++){
+ if(cpumap[i] == '\n')
+ break;
+ if(cpumap[i] != ','){
+ name[k++]=cpumap[i];
+
+ //Enough data for Hex
+ if(k >= NCPUBITS/4){
+ affinity[count++] = strtoul(name, &dummy, 16);
+ k=0;
+ }
+ }
- affinity = strtoul(p, &dummy, 16);
-
+ }
+ if(k!=0){
+ name[k]='\0';
+ affinity[count++] = strtoul(name, &dummy, 16);
+ k=0;
+ }
+ // 0-63bit -> node_info[0], 64-128bit -> node_info[1] ....
+ // revert the sequence
+ for(i=0; i<count && i<MAX_BITMASK_LEN; i++){
+ node_info[i]=affinity[count-i-1];
+ }
close(infile);
}
- return affinity;
+ return ;
}
-static inline unsigned long get_share(int cpu, int level) {
+static inline void get_share(int cpu, int level, unsigned long * share) {
int infile;
- unsigned long affinity;
+ unsigned long affinity[32];
+ char cpumap[160];
char name[160];
- char *p;
-
+ char *dummy;
+ int count=0;
+ int i=0,k=0;
+ int bitmask_idx = 0;
+
sprintf(name, SHARE_NAME, cpu, level);
infile = open(name, O_RDONLY);
- affinity = (1UL << cpu);
-
+ // Init share
+ for(i=0; i<MAX_BITMASK_LEN; i++){
+ share[i]=0;
+ }
+ bitmask_idx = CPUELT(cpu);
+ share[bitmask_idx] = CPUMASK(cpu);
+
if (infile != -1) {
- read(infile, name, sizeof(name));
-
- p = name;
+ read(infile, cpumap, sizeof(cpumap));
- while ((*p == '0') || (*p == ',')) p++;
+ for(i=0; i<160; i++){
+ if(cpumap[i] == '\n')
+ break;
+ if(cpumap[i] != ','){
+ name[k++]=cpumap[i];
+
+ //Enough data
+ if(k >= NCPUBITS/4){
+ affinity[count++] = strtoul(name, &dummy, 16);
+ k=0;
+ }
+ }
- affinity = strtol(p, &p, 16);
+ }
+ if(k!=0){
+ name[k]='\0';
+ affinity[count++] = strtoul(name, &dummy, 16);
+ k=0;
+ }
+ // 0-63bit -> node_info[0], 64-128bit -> node_info[1] ....
+ // revert the sequence
+ for(i=0; i<count && i<MAX_BITMASK_LEN; i++){
+ share[i]=affinity[count-i-1];
+ }
+
close(infile);
}
- return affinity;
+ return ;
}
static int numa_check(void) {
DIR *dp;
struct dirent *dir;
int node;
+ int j;
common -> num_nodes = 0;
return 0;
}
- for (node = 0; node < MAX_NODES; node ++) common -> node_info[node] = 0;
+ for (node = 0; node < MAX_NODES; node ++) {
+ for (j = 0; j<MAX_BITMASK_LEN; j++) common -> node_info[node][j] = 0;
+ }
while ((dir = readdir(dp)) != NULL) {
if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
node = atoi(&dir -> d_name[4]);
if (node > MAX_NODES) {
- fprintf(stderr, "\nGotoBLAS Warining : MAX_NODES (NUMA) is too small. Terminated.\n");
+ fprintf(stderr, "\nOpenBLAS Warning : MAX_NODES (NUMA) is too small. Terminated.\n");
exit(1);
}
common -> num_nodes ++;
- common -> node_info[node] = get_cpumap(node);
+ get_cpumap(node, common->node_info[node]);
}
}
fprintf(stderr, "Numa found : number of Nodes = %2d\n", common -> num_nodes);
for (node = 0; node < common -> num_nodes; node ++)
- fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node]);
+ fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node][0]);
#endif
return common -> num_nodes;
int i, j, h;
unsigned long work, bit;
int count = 0;
+ int bitmask_idx = 0;
for (node = 0; node < common -> num_nodes; node ++) {
core = 0;
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
- if (common -> node_info[node] & common -> avail & (1UL << cpu)) {
+ bitmask_idx = CPUELT(cpu);
+ if (common -> node_info[node][bitmask_idx] & common -> avail[bitmask_idx] & CPUMASK(cpu)) {
common -> cpu_info[count] = WRITE_CORE(core) | WRITE_NODE(node) | WRITE_CPU(cpu);
count ++;
core ++;
static void disable_hyperthread(void) {
- unsigned long share;
+ unsigned long share[MAX_BITMASK_LEN];
int cpu;
+ int bitmask_idx = 0;
+ int i=0, count=0;
+ bitmask_idx = CPUELT(common -> num_procs);
- if(common->num_procs > 64){
- fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs);
- exit(1);
- }else if(common->num_procs == 64){
- common -> avail = 0xFFFFFFFFFFFFFFFFUL;
- }else
- common -> avail = (1UL << common -> num_procs) - 1;
+ for(i=0; i< bitmask_idx; i++){
+ common -> avail[count++] = 0xFFFFFFFFFFFFFFFFUL;
+ }
+ if(CPUMASK(common -> num_procs) != 1){
+ common -> avail[count++] = CPUMASK(common -> num_procs) - 1;
+ }
+ common -> avail_count = count;
+
+ /* if(common->num_procs > 64){ */
+ /* fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); */
+ /* exit(1); */
+ /* }else if(common->num_procs == 64){ */
+ /* common -> avail = 0xFFFFFFFFFFFFFFFFUL; */
+ /* }else */
+ /* common -> avail = (1UL << common -> num_procs) - 1; */
#ifdef DEBUG
- fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail);
+ fprintf(stderr, "\nAvail CPUs : ");
+ for(i=0; i<count; i++)
+ fprintf(stderr, "%04lx ", common -> avail[i]);
+ fprintf(stderr, ".\n");
#endif
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
-
- share = (get_share(cpu, 1) & common -> avail);
-
- if (popcount(share) > 1) {
+
+ get_share(cpu, 1, share);
+
+ //When the shared cpu are in different element of share & avail array, this may be a bug.
+ for (i = 0; i < count ; i++){
+ if (popcount(share[i]) > 1) {
#ifdef DEBUG
- fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
- cpu, share & ~(1UL << cpu));
+ fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
+ cpu, share[i] & ~(CPUMASK(cpu)));
#endif
- common -> avail &= ~((share & ~(1UL << cpu)));
+ common -> avail[i] &= ~((share[i] & ~ CPUMASK(cpu)));
+ }
}
}
}
static void disable_affinity(void) {
-
+ int i=0;
+ int bitmask_idx=0;
+ int count=0;
#ifdef DEBUG
- fprintf(stderr, "Final all available CPUs : %04lx.\n\n", common -> avail);
+ fprintf(stderr, "Final all available CPUs : %04lx.\n\n", common -> avail[0]);
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
#endif
- if(common->final_num_procs > 64){
- fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs);
- exit(1);
- }else if(common->final_num_procs == 64){
- lprocmask = 0xFFFFFFFFFFFFFFFFUL;
- }else
- lprocmask = (1UL << common -> final_num_procs) - 1;
+ /* if(common->final_num_procs > 64){ */
+ /* fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); */
+ /* exit(1); */
+ /* }else if(common->final_num_procs == 64){ */
+ /* lprocmask = 0xFFFFFFFFFFFFFFFFUL; */
+ /* }else */
+ /* lprocmask = (1UL << common -> final_num_procs) - 1; */
+
+ bitmask_idx = CPUELT(common -> final_num_procs);
+
+ for(i=0; i< bitmask_idx; i++){
+ lprocmask[count++] = 0xFFFFFFFFFFFFFFFFUL;
+ }
+ if(CPUMASK(common -> final_num_procs) != 1){
+ lprocmask[count++] = CPUMASK(common -> final_num_procs) - 1;
+ }
+ lprocmask_count = count;
#ifndef USE_OPENMP
- lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
+ for(i=0; i< count; i++){
+ lprocmask[i] &= ((unsigned long *)&cpu_orig_mask[0])[i];
+ }
#endif
#ifdef DEBUG
- fprintf(stderr, "I choose these CPUs : %04lx.\n\n", lprocmask);
+ fprintf(stderr, "I choose these CPUs : %04lx.\n\n", lprocmask[0]);
#endif
}
static void local_cpu_map(void) {
int cpu, id, mapping;
-
+ int bitmask_idx = 0;
cpu = 0;
mapping = 0;
if (id > 0) {
if (is_dead(id)) common -> cpu_use[cpu] = 0;
}
-
- if ((common -> cpu_use[cpu] == 0) && (lprocmask & (1UL << cpu))) {
+
+ bitmask_idx = CPUELT(cpu);
+ if ((common -> cpu_use[cpu] == 0) && (lprocmask[bitmask_idx] & CPUMASK(cpu))) {
common -> cpu_use[cpu] = pshmid;
cpu_mapping[mapping] = READ_CPU(common -> cpu_info[cpu]);
#ifndef USE_OPENMP
cpu_set_t cpu_mask;
#endif
+ int i;
if (initialized) return;
common -> num_procs = get_nprocs();
+ if(common -> num_procs > MAX_CPUS) {
+ fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
+ exit(1);
+ }
+
for (cpu = 0; cpu < common -> num_procs; cpu++) common -> cpu_info[cpu] = cpu;
numa_check();
if (common -> num_nodes > 1) numa_mapping();
- common -> final_num_procs = popcount(common -> avail);
+ common -> final_num_procs = 0;
+ for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += popcount(common -> avail[i]);
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;
disable_affinity();
- num_avail = popcount(lprocmask);
+ num_avail = 0;
+ for(i=0; i<lprocmask_count; i++) num_avail += popcount(lprocmask[i]);
if ((numprocs <= 0) || (numprocs > num_avail)) numprocs = num_avail;