e907109983f185801a2b0afff8591995e5c1b070
[platform/kernel/linux-arm64.git] / arch / ia64 / kernel / domain.c
1 /*
2  * arch/ia64/kernel/domain.c
3  * Architecture specific sched-domains builder.
4  *
5  * Copyright (C) 2004 Jesse Barnes
6  * Copyright (C) 2004 Silicon Graphics, Inc.
7  */
8
9 #include <linux/sched.h>
10 #include <linux/percpu.h>
11 #include <linux/slab.h>
12 #include <linux/cpumask.h>
13 #include <linux/init.h>
14 #include <linux/topology.h>
15 #include <linux/nodemask.h>
16
17 #define SD_NODES_PER_DOMAIN 16
18
19 #ifdef CONFIG_NUMA
20 /**
21  * find_next_best_node - find the next node to include in a sched_domain
22  * @node: node whose sched_domain we're building
23  * @used_nodes: nodes already in the sched_domain
24  *
25  * Find the next node to include in a given scheduling domain.  Simply
26  * finds the closest node not already in the @used_nodes map.
27  *
28  * Should use nodemask_t.
29  */
30 static int find_next_best_node(int node, unsigned long *used_nodes)
31 {
32         int i, n, val, min_val, best_node = 0;
33
34         min_val = INT_MAX;
35
36         for (i = 0; i < MAX_NUMNODES; i++) {
37                 /* Start at @node */
38                 n = (node + i) % MAX_NUMNODES;
39
40                 if (!nr_cpus_node(n))
41                         continue;
42
43                 /* Skip already used nodes */
44                 if (test_bit(n, used_nodes))
45                         continue;
46
47                 /* Simple min distance search */
48                 val = node_distance(node, n);
49
50                 if (val < min_val) {
51                         min_val = val;
52                         best_node = n;
53                 }
54         }
55
56         set_bit(best_node, used_nodes);
57         return best_node;
58 }
59
60 /**
61  * sched_domain_node_span - get a cpumask for a node's sched_domain
62  * @node: node whose cpumask we're constructing
63  * @size: number of nodes to include in this span
64  *
65  * Given a node, construct a good cpumask for its sched_domain to span.  It
66  * should be one that prevents unnecessary balancing, but also spreads tasks
67  * out optimally.
68  */
69 static cpumask_t sched_domain_node_span(int node)
70 {
71         int i;
72         cpumask_t span, nodemask;
73         DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
74
75         cpus_clear(span);
76         bitmap_zero(used_nodes, MAX_NUMNODES);
77
78         nodemask = node_to_cpumask(node);
79         cpus_or(span, span, nodemask);
80         set_bit(node, used_nodes);
81
82         for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
83                 int next_node = find_next_best_node(node, used_nodes);
84                 nodemask = node_to_cpumask(next_node);
85                 cpus_or(span, span, nodemask);
86         }
87
88         return span;
89 }
90 #endif
91
92 /*
93  * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
94  * can switch it on easily if needed.
95  */
96 #ifdef CONFIG_SCHED_SMT
97 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
98 static struct sched_group sched_group_cpus[NR_CPUS];
99 static int cpu_to_cpu_group(int cpu)
100 {
101         return cpu;
102 }
103 #endif
104
105 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
106 static struct sched_group sched_group_phys[NR_CPUS];
107 static int cpu_to_phys_group(int cpu)
108 {
109 #ifdef CONFIG_SCHED_SMT
110         return first_cpu(cpu_sibling_map[cpu]);
111 #else
112         return cpu;
113 #endif
114 }
115
116 #ifdef CONFIG_NUMA
117 /*
118  * The init_sched_build_groups can't handle what we want to do with node
119  * groups, so roll our own. Now each node has its own list of groups which
120  * gets dynamically allocated.
121  */
122 static DEFINE_PER_CPU(struct sched_domain, node_domains);
123 static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
124
125 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
126 static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
127
128 static int cpu_to_allnodes_group(int cpu)
129 {
130         return cpu_to_node(cpu);
131 }
132 #endif
133
134 /*
135  * Build sched domains for a given set of cpus and attach the sched domains
136  * to the individual cpus
137  */
138 void build_sched_domains(const cpumask_t *cpu_map)
139 {
140         int i;
141 #ifdef CONFIG_NUMA
142         struct sched_group **sched_group_nodes = NULL;
143         struct sched_group *sched_group_allnodes = NULL;
144
145         /*
146          * Allocate the per-node list of sched groups
147          */
148         sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
149                                            GFP_ATOMIC);
150         if (!sched_group_nodes) {
151                 printk(KERN_WARNING "Can not alloc sched group node list\n");
152                 return;
153         }
154         sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
155 #endif
156
157         /*
158          * Set up domains for cpus specified by the cpu_map.
159          */
160         for_each_cpu_mask(i, *cpu_map) {
161                 int group;
162                 struct sched_domain *sd = NULL, *p;
163                 cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
164
165                 cpus_and(nodemask, nodemask, *cpu_map);
166
167 #ifdef CONFIG_NUMA
168                 if (cpus_weight(*cpu_map)
169                                 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
170                         if (!sched_group_allnodes) {
171                                 sched_group_allnodes
172                                         = kmalloc(sizeof(struct sched_group)
173                                                         * MAX_NUMNODES,
174                                                   GFP_KERNEL);
175                                 if (!sched_group_allnodes) {
176                                         printk(KERN_WARNING
177                                         "Can not alloc allnodes sched group\n");
178                                         break;
179                                 }
180                                 sched_group_allnodes_bycpu[i]
181                                                 = sched_group_allnodes;
182                         }
183                         sd = &per_cpu(allnodes_domains, i);
184                         *sd = SD_ALLNODES_INIT;
185                         sd->span = *cpu_map;
186                         group = cpu_to_allnodes_group(i);
187                         sd->groups = &sched_group_allnodes[group];
188                         p = sd;
189                 } else
190                         p = NULL;
191
192                 sd = &per_cpu(node_domains, i);
193                 *sd = SD_NODE_INIT;
194                 sd->span = sched_domain_node_span(cpu_to_node(i));
195                 sd->parent = p;
196                 cpus_and(sd->span, sd->span, *cpu_map);
197 #endif
198
199                 p = sd;
200                 sd = &per_cpu(phys_domains, i);
201                 group = cpu_to_phys_group(i);
202                 *sd = SD_CPU_INIT;
203                 sd->span = nodemask;
204                 sd->parent = p;
205                 sd->groups = &sched_group_phys[group];
206
207 #ifdef CONFIG_SCHED_SMT
208                 p = sd;
209                 sd = &per_cpu(cpu_domains, i);
210                 group = cpu_to_cpu_group(i);
211                 *sd = SD_SIBLING_INIT;
212                 sd->span = cpu_sibling_map[i];
213                 cpus_and(sd->span, sd->span, *cpu_map);
214                 sd->parent = p;
215                 sd->groups = &sched_group_cpus[group];
216 #endif
217         }
218
219 #ifdef CONFIG_SCHED_SMT
220         /* Set up CPU (sibling) groups */
221         for_each_cpu_mask(i, *cpu_map) {
222                 cpumask_t this_sibling_map = cpu_sibling_map[i];
223                 cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
224                 if (i != first_cpu(this_sibling_map))
225                         continue;
226
227                 init_sched_build_groups(sched_group_cpus, this_sibling_map,
228                                                 &cpu_to_cpu_group);
229         }
230 #endif
231
232         /* Set up physical groups */
233         for (i = 0; i < MAX_NUMNODES; i++) {
234                 cpumask_t nodemask = node_to_cpumask(i);
235
236                 cpus_and(nodemask, nodemask, *cpu_map);
237                 if (cpus_empty(nodemask))
238                         continue;
239
240                 init_sched_build_groups(sched_group_phys, nodemask,
241                                                 &cpu_to_phys_group);
242         }
243
244 #ifdef CONFIG_NUMA
245         if (sched_group_allnodes)
246                 init_sched_build_groups(sched_group_allnodes, *cpu_map,
247                                         &cpu_to_allnodes_group);
248
249         for (i = 0; i < MAX_NUMNODES; i++) {
250                 /* Set up node groups */
251                 struct sched_group *sg, *prev;
252                 cpumask_t nodemask = node_to_cpumask(i);
253                 cpumask_t domainspan;
254                 cpumask_t covered = CPU_MASK_NONE;
255                 int j;
256
257                 cpus_and(nodemask, nodemask, *cpu_map);
258                 if (cpus_empty(nodemask)) {
259                         sched_group_nodes[i] = NULL;
260                         continue;
261                 }
262
263                 domainspan = sched_domain_node_span(i);
264                 cpus_and(domainspan, domainspan, *cpu_map);
265
266                 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
267                 sched_group_nodes[i] = sg;
268                 for_each_cpu_mask(j, nodemask) {
269                         struct sched_domain *sd;
270                         sd = &per_cpu(node_domains, j);
271                         sd->groups = sg;
272                         if (sd->groups == NULL) {
273                                 /* Turn off balancing if we have no groups */
274                                 sd->flags = 0;
275                         }
276                 }
277                 if (!sg) {
278                         printk(KERN_WARNING
279                         "Can not alloc domain group for node %d\n", i);
280                         continue;
281                 }
282                 sg->cpu_power = 0;
283                 sg->cpumask = nodemask;
284                 cpus_or(covered, covered, nodemask);
285                 prev = sg;
286
287                 for (j = 0; j < MAX_NUMNODES; j++) {
288                         cpumask_t tmp, notcovered;
289                         int n = (i + j) % MAX_NUMNODES;
290
291                         cpus_complement(notcovered, covered);
292                         cpus_and(tmp, notcovered, *cpu_map);
293                         cpus_and(tmp, tmp, domainspan);
294                         if (cpus_empty(tmp))
295                                 break;
296
297                         nodemask = node_to_cpumask(n);
298                         cpus_and(tmp, tmp, nodemask);
299                         if (cpus_empty(tmp))
300                                 continue;
301
302                         sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
303                         if (!sg) {
304                                 printk(KERN_WARNING
305                                 "Can not alloc domain group for node %d\n", j);
306                                 break;
307                         }
308                         sg->cpu_power = 0;
309                         sg->cpumask = tmp;
310                         cpus_or(covered, covered, tmp);
311                         prev->next = sg;
312                         prev = sg;
313                 }
314                 prev->next = sched_group_nodes[i];
315         }
316 #endif
317
318         /* Calculate CPU power for physical packages and nodes */
319         for_each_cpu_mask(i, *cpu_map) {
320                 int power;
321                 struct sched_domain *sd;
322 #ifdef CONFIG_SCHED_SMT
323                 sd = &per_cpu(cpu_domains, i);
324                 power = SCHED_LOAD_SCALE;
325                 sd->groups->cpu_power = power;
326 #endif
327
328                 sd = &per_cpu(phys_domains, i);
329                 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
330                                 (cpus_weight(sd->groups->cpumask)-1) / 10;
331                 sd->groups->cpu_power = power;
332
333 #ifdef CONFIG_NUMA
334                 sd = &per_cpu(allnodes_domains, i);
335                 if (sd->groups) {
336                         power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
337                                 (cpus_weight(sd->groups->cpumask)-1) / 10;
338                         sd->groups->cpu_power = power;
339                 }
340 #endif
341         }
342
343 #ifdef CONFIG_NUMA
344         for (i = 0; i < MAX_NUMNODES; i++) {
345                 struct sched_group *sg = sched_group_nodes[i];
346                 int j;
347
348                 if (sg == NULL)
349                         continue;
350 next_sg:
351                 for_each_cpu_mask(j, sg->cpumask) {
352                         struct sched_domain *sd;
353                         int power;
354
355                         sd = &per_cpu(phys_domains, j);
356                         if (j != first_cpu(sd->groups->cpumask)) {
357                                 /*
358                                  * Only add "power" once for each
359                                  * physical package.
360                                  */
361                                 continue;
362                         }
363                         power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
364                                 (cpus_weight(sd->groups->cpumask)-1) / 10;
365
366                         sg->cpu_power += power;
367                 }
368                 sg = sg->next;
369                 if (sg != sched_group_nodes[i])
370                         goto next_sg;
371         }
372 #endif
373
374         /* Attach the domains */
375         for_each_cpu_mask(i, *cpu_map) {
376                 struct sched_domain *sd;
377 #ifdef CONFIG_SCHED_SMT
378                 sd = &per_cpu(cpu_domains, i);
379 #else
380                 sd = &per_cpu(phys_domains, i);
381 #endif
382                 cpu_attach_domain(sd, i);
383         }
384 }
385 /*
386  * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
387  */
388 void arch_init_sched_domains(const cpumask_t *cpu_map)
389 {
390         cpumask_t cpu_default_map;
391
392         /*
393          * Setup mask for cpus without special case scheduling requirements.
394          * For now this just excludes isolated cpus, but could be used to
395          * exclude other special cases in the future.
396          */
397         cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
398
399         build_sched_domains(&cpu_default_map);
400 }
401
402 void arch_destroy_sched_domains(const cpumask_t *cpu_map)
403 {
404 #ifdef CONFIG_NUMA
405         int i;
406         int cpu;
407
408         for_each_cpu_mask(cpu, *cpu_map) {
409                 struct sched_group *sched_group_allnodes
410                         = sched_group_allnodes_bycpu[cpu];
411                 struct sched_group **sched_group_nodes
412                         = sched_group_nodes_bycpu[cpu];
413
414                 if (sched_group_allnodes) {
415                         kfree(sched_group_allnodes);
416                         sched_group_allnodes_bycpu[cpu] = NULL;
417                 }
418
419                 if (!sched_group_nodes)
420                         continue;
421
422                 for (i = 0; i < MAX_NUMNODES; i++) {
423                         cpumask_t nodemask = node_to_cpumask(i);
424                         struct sched_group *oldsg, *sg = sched_group_nodes[i];
425
426                         cpus_and(nodemask, nodemask, *cpu_map);
427                         if (cpus_empty(nodemask))
428                                 continue;
429
430                         if (sg == NULL)
431                                 continue;
432                         sg = sg->next;
433 next_sg:
434                         oldsg = sg;
435                         sg = sg->next;
436                         kfree(oldsg);
437                         if (oldsg != sched_group_nodes[i])
438                                 goto next_sg;
439                 }
440                 kfree(sched_group_nodes);
441                 sched_group_nodes_bycpu[cpu] = NULL;
442         }
443 #endif
444 }