From 0fb380c966c071e2af71031b7d82b8bf2e3246a1 Mon Sep 17 00:00:00 2001 From: Ashwin Sekhar T K Date: Fri, 29 Apr 2016 11:58:15 +0530 Subject: [PATCH] Update NUMA CPU binding When the number of process can all be accommodated within the current node, then use cores from the current node only. --- driver/others/init.c | 109 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 26 deletions(-) diff --git a/driver/others/init.c b/driver/others/init.c index f134f85..801f939 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -361,6 +361,9 @@ static void numa_mapping(void) { unsigned long work, bit; int count = 0; int bitmask_idx = 0; + int current_cpu; + int current_node = 0; + int cpu_count = 0; for (node = 0; node < common -> num_nodes; node ++) { core = 0; @@ -382,33 +385,84 @@ static void numa_mapping(void) { fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]); #endif - h = 1; - - while (h < count) h = 2 * h + 1; - - while (h > 1) { - h /= 2; - for (i = h; i < count; i++) { - work = common -> cpu_info[i]; - bit = CPU_ISSET(i, &cpu_orig_mask[0]); - j = i - h; - while (work < common -> cpu_info[j]) { - common -> cpu_info[j + h] = common -> cpu_info[j]; - if (CPU_ISSET(j, &cpu_orig_mask[0])) { - CPU_SET(j + h, &cpu_orig_mask[0]); - } else { - CPU_CLR(j + h, &cpu_orig_mask[0]); - } - j -= h; - if (j < 0) break; - } - common -> cpu_info[j + h] = work; - if (bit) { - CPU_SET(j + h, &cpu_orig_mask[0]); - } else { - CPU_CLR(j + h, &cpu_orig_mask[0]); + current_cpu = sched_getcpu(); + for (cpu = 0; cpu < count; cpu++) { + if (READ_CPU(common -> cpu_info[cpu]) == current_cpu) { + current_node = READ_NODE(common -> cpu_info[cpu]); + break; + } + } + for (i = 0; i < MAX_BITMASK_LEN; i++) + cpu_count += popcount(common -> node_info[current_node][i] & common -> avail[i]); + + /* + * If all the processes can be accommodated in the + * in the current node itself, then bind to cores + * from the current node only + */ + if (numprocs <= cpu_count) { + /* + * First sort all the cores in order from the current node. + * Then take remaining nodes one by one in order, + * and sort their cores in order. + */ + for (i = 0; i < count; i++) { + for (j = 0; j < count - 1; j++) { + int node_1, node_2; + int core_1, core_2; + int swap = 0; + + node_1 = READ_NODE(common -> cpu_info[j]); + node_2 = READ_NODE(common -> cpu_info[j + 1]); + core_1 = READ_CORE(common -> cpu_info[j]); + core_2 = READ_CORE(common -> cpu_info[j + 1]); + + if (node_1 == node_2) { + if (core_1 > core_2) + swap = 1; + } else { + if ((node_2 == current_node) || + ((node_1 != current_node) && (node_1 > node_2))) + swap = 1; + } + if (swap) { + unsigned long temp; + + temp = common->cpu_info[j]; + common->cpu_info[j] = common->cpu_info[j + 1]; + common->cpu_info[j + 1] = temp; + } } + } + } else { + h = 1; + + while (h < count) h = 2 * h + 1; + + while (h > 1) { + h /= 2; + for (i = h; i < count; i++) { + work = common -> cpu_info[i]; + bit = CPU_ISSET(i, &cpu_orig_mask[0]); + j = i - h; + while (work < common -> cpu_info[j]) { + common -> cpu_info[j + h] = common -> cpu_info[j]; + if (CPU_ISSET(j, &cpu_orig_mask[0])) { + CPU_SET(j + h, &cpu_orig_mask[0]); + } else { + CPU_CLR(j + h, &cpu_orig_mask[0]); + } + j -= h; + if (j < 0) break; + } + common -> cpu_info[j + h] = work; + if (bit) { + CPU_SET(j + h, &cpu_orig_mask[0]); + } else { + CPU_CLR(j + h, &cpu_orig_mask[0]); + } + } } } @@ -416,7 +470,10 @@ static void numa_mapping(void) { fprintf(stderr, "\nSorting ...\n\n"); for (cpu = 0; cpu < count; cpu++) - fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]); + fprintf(stderr, "CPUINFO (%2d) : %08lx (CPU=%3lu CORE=%3lu NODE=%3lu)\n", cpu, common -> cpu_info[cpu], + READ_CPU(common -> cpu_info[cpu]), + READ_CORE(common -> cpu_info[cpu]), + READ_NODE(common -> cpu_info[cpu])); #endif } -- 2.7.4