arch/ia64/kernel/domain.c

   1 /*
   2  * arch/ia64/kernel/domain.c
   3  * Architecture specific sched-domains builder.
   4  *
   5  * Copyright (C) 2004 Jesse Barnes
   6  * Copyright (C) 2004 Silicon Graphics, Inc.
   7  */
   8
   9 #include <linux/sched.h>
  10 #include <linux/percpu.h>
  11 #include <linux/slab.h>
  12 #include <linux/cpumask.h>
  13 #include <linux/init.h>
  14 #include <linux/topology.h>
  15 #include <linux/nodemask.h>
  16
  17 #define SD_NODES_PER_DOMAIN 16
  18
  19 #ifdef CONFIG_NUMA
  20 /**
  21  * find_next_best_node - find the next node to include in a sched_domain
  22  * @node: node whose sched_domain we're building
  23  * @used_nodes: nodes already in the sched_domain
  24  *
  25  * Find the next node to include in a given scheduling domain.  Simply
  26  * finds the closest node not already in the @used_nodes map.
  27  *
  28  * Should use nodemask_t.
  29  */
  30 static int find_next_best_node(int node, unsigned long *used_nodes)
  31 {
  32         int i, n, val, min_val, best_node = 0;
  33
  34         min_val = INT_MAX;
  35
  36         for (i = 0; i < MAX_NUMNODES; i++) {
  37                 /* Start at @node */
  38                 n = (node + i) % MAX_NUMNODES;
  39
  40                 if (!nr_cpus_node(n))
  41                         continue;
  42
  43                 /* Skip already used nodes */
  44                 if (test_bit(n, used_nodes))
  45                         continue;
  46
  47                 /* Simple min distance search */
  48                 val = node_distance(node, n);
  49
  50                 if (val < min_val) {
  51                         min_val = val;
  52                         best_node = n;
  53                 }
  54         }
  55
  56         set_bit(best_node, used_nodes);
  57         return best_node;
  58 }
  59
  60 /**
  61  * sched_domain_node_span - get a cpumask for a node's sched_domain
  62  * @node: node whose cpumask we're constructing
  63  * @size: number of nodes to include in this span
  64  *
  65  * Given a node, construct a good cpumask for its sched_domain to span.  It
  66  * should be one that prevents unnecessary balancing, but also spreads tasks
  67  * out optimally.
  68  */
  69 static cpumask_t sched_domain_node_span(int node)
  70 {
  71         int i;
  72         cpumask_t span, nodemask;
  73         DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
  74
  75         cpus_clear(span);
  76         bitmap_zero(used_nodes, MAX_NUMNODES);
  77
  78         nodemask = node_to_cpumask(node);
  79         cpus_or(span, span, nodemask);
  80         set_bit(node, used_nodes);
  81
  82         for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
  83                 int next_node = find_next_best_node(node, used_nodes);
  84                 nodemask = node_to_cpumask(next_node);
  85                 cpus_or(span, span, nodemask);
  86         }
  87
  88         return span;
  89 }
  90 #endif
  91
  92 /*
  93  * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
  94  * can switch it on easily if needed.
  95  */
  96 #ifdef CONFIG_SCHED_SMT
  97 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
  98 static struct sched_group sched_group_cpus[NR_CPUS];
  99 static int cpu_to_cpu_group(int cpu)
 100 {
 101         return cpu;
 102 }
 103 #endif
 104
 105 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
 106 static struct sched_group sched_group_phys[NR_CPUS];
 107 static int cpu_to_phys_group(int cpu)
 108 {
 109 #ifdef CONFIG_SCHED_SMT
 110         return first_cpu(cpu_sibling_map[cpu]);
 111 #else
 112         return cpu;
 113 #endif
 114 }
 115
 116 #ifdef CONFIG_NUMA
 117 /*
 118  * The init_sched_build_groups can't handle what we want to do with node
 119  * groups, so roll our own. Now each node has its own list of groups which
 120  * gets dynamically allocated.
 121  */
 122 static DEFINE_PER_CPU(struct sched_domain, node_domains);
 123 static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
 124
 125 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
 126 static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
 127
 128 static int cpu_to_allnodes_group(int cpu)
 129 {
 130         return cpu_to_node(cpu);
 131 }
 132 #endif
 133
 134 /*
 135  * Build sched domains for a given set of cpus and attach the sched domains
 136  * to the individual cpus
 137  */
 138 void build_sched_domains(const cpumask_t *cpu_map)
 139 {
 140         int i;
 141 #ifdef CONFIG_NUMA
 142         struct sched_group **sched_group_nodes = NULL;
 143         struct sched_group *sched_group_allnodes = NULL;
 144
 145         /*
 146          * Allocate the per-node list of sched groups
 147          */
 148         sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
 149                                            GFP_ATOMIC);
 150         if (!sched_group_nodes) {
 151                 printk(KERN_WARNING "Can not alloc sched group node list\n");
 152                 return;
 153         }
 154         sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
 155 #endif
 156
 157         /*
 158          * Set up domains for cpus specified by the cpu_map.
 159          */
 160         for_each_cpu_mask(i, *cpu_map) {
 161                 int group;
 162                 struct sched_domain *sd = NULL, *p;
 163                 cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
 164
 165                 cpus_and(nodemask, nodemask, *cpu_map);
 166
 167 #ifdef CONFIG_NUMA
 168                 if (cpus_weight(*cpu_map)
 169                                 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
 170                         if (!sched_group_allnodes) {
 171                                 sched_group_allnodes
 172                                         = kmalloc(sizeof(struct sched_group)
 173                                                         * MAX_NUMNODES,
 174                                                   GFP_KERNEL);
 175                                 if (!sched_group_allnodes) {
 176                                         printk(KERN_WARNING
 177                                         "Can not alloc allnodes sched group\n");
 178                                         break;
 179                                 }
 180                                 sched_group_allnodes_bycpu[i]
 181                                                 = sched_group_allnodes;
 182                         }
 183                         sd = &per_cpu(allnodes_domains, i);
 184                         *sd = SD_ALLNODES_INIT;
 185                         sd->span = *cpu_map;
 186                         group = cpu_to_allnodes_group(i);
 187                         sd->groups = &sched_group_allnodes[group];
 188                         p = sd;
 189                 } else
 190                         p = NULL;
 191
 192                 sd = &per_cpu(node_domains, i);
 193                 *sd = SD_NODE_INIT;
 194                 sd->span = sched_domain_node_span(cpu_to_node(i));
 195                 sd->parent = p;
 196                 cpus_and(sd->span, sd->span, *cpu_map);
 197 #endif
 198
 199                 p = sd;
 200                 sd = &per_cpu(phys_domains, i);
 201                 group = cpu_to_phys_group(i);
 202                 *sd = SD_CPU_INIT;
 203                 sd->span = nodemask;
 204                 sd->parent = p;
 205                 sd->groups = &sched_group_phys[group];
 206
 207 #ifdef CONFIG_SCHED_SMT
 208                 p = sd;
 209                 sd = &per_cpu(cpu_domains, i);
 210                 group = cpu_to_cpu_group(i);
 211                 *sd = SD_SIBLING_INIT;
 212                 sd->span = cpu_sibling_map[i];
 213                 cpus_and(sd->span, sd->span, *cpu_map);
 214                 sd->parent = p;
 215                 sd->groups = &sched_group_cpus[group];
 216 #endif
 217         }
 218
 219 #ifdef CONFIG_SCHED_SMT
 220         /* Set up CPU (sibling) groups */
 221         for_each_cpu_mask(i, *cpu_map) {
 222                 cpumask_t this_sibling_map = cpu_sibling_map[i];
 223                 cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
 224                 if (i != first_cpu(this_sibling_map))
 225                         continue;
 226
 227                 init_sched_build_groups(sched_group_cpus, this_sibling_map,
 228                                                 &cpu_to_cpu_group);
 229         }
 230 #endif
 231
 232         /* Set up physical groups */
 233         for (i = 0; i < MAX_NUMNODES; i++) {
 234                 cpumask_t nodemask = node_to_cpumask(i);
 235
 236                 cpus_and(nodemask, nodemask, *cpu_map);
 237                 if (cpus_empty(nodemask))
 238                         continue;
 239
 240                 init_sched_build_groups(sched_group_phys, nodemask,
 241                                                 &cpu_to_phys_group);
 242         }
 243
 244 #ifdef CONFIG_NUMA
 245         if (sched_group_allnodes)
 246                 init_sched_build_groups(sched_group_allnodes, *cpu_map,
 247                                         &cpu_to_allnodes_group);
 248
 249         for (i = 0; i < MAX_NUMNODES; i++) {
 250                 /* Set up node groups */
 251                 struct sched_group *sg, *prev;
 252                 cpumask_t nodemask = node_to_cpumask(i);
 253                 cpumask_t domainspan;
 254                 cpumask_t covered = CPU_MASK_NONE;
 255                 int j;
 256
 257                 cpus_and(nodemask, nodemask, *cpu_map);
 258                 if (cpus_empty(nodemask)) {
 259                         sched_group_nodes[i] = NULL;
 260                         continue;
 261                 }
 262
 263                 domainspan = sched_domain_node_span(i);
 264                 cpus_and(domainspan, domainspan, *cpu_map);
 265
 266                 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
 267                 sched_group_nodes[i] = sg;
 268                 for_each_cpu_mask(j, nodemask) {
 269                         struct sched_domain *sd;
 270                         sd = &per_cpu(node_domains, j);
 271                         sd->groups = sg;
 272                         if (sd->groups == NULL) {
 273                                 /* Turn off balancing if we have no groups */
 274                                 sd->flags = 0;
 275                         }
 276                 }
 277                 if (!sg) {
 278                         printk(KERN_WARNING
 279                         "Can not alloc domain group for node %d\n", i);
 280                         continue;
 281                 }
 282                 sg->cpu_power = 0;
 283                 sg->cpumask = nodemask;
 284                 cpus_or(covered, covered, nodemask);
 285                 prev = sg;
 286
 287                 for (j = 0; j < MAX_NUMNODES; j++) {
 288                         cpumask_t tmp, notcovered;
 289                         int n = (i + j) % MAX_NUMNODES;
 290
 291                         cpus_complement(notcovered, covered);
 292                         cpus_and(tmp, notcovered, *cpu_map);
 293                         cpus_and(tmp, tmp, domainspan);
 294                         if (cpus_empty(tmp))
 295                                 break;
 296
 297                         nodemask = node_to_cpumask(n);
 298                         cpus_and(tmp, tmp, nodemask);
 299                         if (cpus_empty(tmp))
 300                                 continue;
 301
 302                         sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
 303                         if (!sg) {
 304                                 printk(KERN_WARNING
 305                                 "Can not alloc domain group for node %d\n", j);
 306                                 break;
 307                         }
 308                         sg->cpu_power = 0;
 309                         sg->cpumask = tmp;
 310                         cpus_or(covered, covered, tmp);
 311                         prev->next = sg;
 312                         prev = sg;
 313                 }
 314                 prev->next = sched_group_nodes[i];
 315         }
 316 #endif
 317
 318         /* Calculate CPU power for physical packages and nodes */
 319         for_each_cpu_mask(i, *cpu_map) {
 320                 int power;
 321                 struct sched_domain *sd;
 322 #ifdef CONFIG_SCHED_SMT
 323                 sd = &per_cpu(cpu_domains, i);
 324                 power = SCHED_LOAD_SCALE;
 325                 sd->groups->cpu_power = power;
 326 #endif
 327
 328                 sd = &per_cpu(phys_domains, i);
 329                 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
 330                                 (cpus_weight(sd->groups->cpumask)-1) / 10;
 331                 sd->groups->cpu_power = power;
 332
 333 #ifdef CONFIG_NUMA
 334                 sd = &per_cpu(allnodes_domains, i);
 335                 if (sd->groups) {
 336                         power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
 337                                 (cpus_weight(sd->groups->cpumask)-1) / 10;
 338                         sd->groups->cpu_power = power;
 339                 }
 340 #endif
 341         }
 342
 343 #ifdef CONFIG_NUMA
 344         for (i = 0; i < MAX_NUMNODES; i++) {
 345                 struct sched_group *sg = sched_group_nodes[i];
 346                 int j;
 347
 348                 if (sg == NULL)
 349                         continue;
 350 next_sg:
 351                 for_each_cpu_mask(j, sg->cpumask) {
 352                         struct sched_domain *sd;
 353                         int power;
 354
 355                         sd = &per_cpu(phys_domains, j);
 356                         if (j != first_cpu(sd->groups->cpumask)) {
 357                                 /*
 358                                  * Only add "power" once for each
 359                                  * physical package.
 360                                  */
 361                                 continue;
 362                         }
 363                         power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
 364                                 (cpus_weight(sd->groups->cpumask)-1) / 10;
 365
 366                         sg->cpu_power += power;
 367                 }
 368                 sg = sg->next;
 369                 if (sg != sched_group_nodes[i])
 370                         goto next_sg;
 371         }
 372 #endif
 373
 374         /* Attach the domains */
 375         for_each_cpu_mask(i, *cpu_map) {
 376                 struct sched_domain *sd;
 377 #ifdef CONFIG_SCHED_SMT
 378                 sd = &per_cpu(cpu_domains, i);
 379 #else
 380                 sd = &per_cpu(phys_domains, i);
 381 #endif
 382                 cpu_attach_domain(sd, i);
 383         }
 384 }
 385 /*
 386  * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
 387  */
 388 void arch_init_sched_domains(const cpumask_t *cpu_map)
 389 {
 390         cpumask_t cpu_default_map;
 391
 392         /*
 393          * Setup mask for cpus without special case scheduling requirements.
 394          * For now this just excludes isolated cpus, but could be used to
 395          * exclude other special cases in the future.
 396          */
 397         cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
 398
 399         build_sched_domains(&cpu_default_map);
 400 }
 401
 402 void arch_destroy_sched_domains(const cpumask_t *cpu_map)
 403 {
 404 #ifdef CONFIG_NUMA
 405         int i;
 406         int cpu;
 407
 408         for_each_cpu_mask(cpu, *cpu_map) {
 409                 struct sched_group *sched_group_allnodes
 410                         = sched_group_allnodes_bycpu[cpu];
 411                 struct sched_group **sched_group_nodes
 412                         = sched_group_nodes_bycpu[cpu];
 413
 414                 if (sched_group_allnodes) {
 415                         kfree(sched_group_allnodes);
 416                         sched_group_allnodes_bycpu[cpu] = NULL;
 417                 }
 418
 419                 if (!sched_group_nodes)
 420                         continue;
 421
 422                 for (i = 0; i < MAX_NUMNODES; i++) {
 423                         cpumask_t nodemask = node_to_cpumask(i);
 424                         struct sched_group *oldsg, *sg = sched_group_nodes[i];
 425
 426                         cpus_and(nodemask, nodemask, *cpu_map);
 427                         if (cpus_empty(nodemask))
 428                                 continue;
 429
 430                         if (sg == NULL)
 431                                 continue;
 432                         sg = sg->next;
 433 next_sg:
 434                         oldsg = sg;
 435                         sg = sg->next;
 436                         kfree(oldsg);
 437                         if (oldsg != sched_group_nodes[i])
 438                                 goto next_sg;
 439                 }
 440                 kfree(sched_group_nodes);
 441                 sched_group_nodes_bycpu[cpu] = NULL;
 442         }
 443 #endif
 444 }