patch-5.15.79-rt54.patch
[platform/kernel/linux-rpi.git] / kernel / sched / topology.c
index 55a0a24..3d0157b 100644 (file)
@@ -467,7 +467,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
        struct root_domain *old_rd = NULL;
        unsigned long flags;
 
-       raw_spin_lock_irqsave(&rq->lock, flags);
+       raw_spin_rq_lock_irqsave(rq, flags);
 
        if (rq->rd) {
                old_rd = rq->rd;
@@ -493,7 +493,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
        if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
                set_rq_online(rq);
 
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
+       raw_spin_rq_unlock_irqrestore(rq, flags);
 
        if (old_rd)
                call_rcu(&old_rd->rcu, free_rootdomain);
@@ -526,7 +526,7 @@ static int init_rootdomain(struct root_domain *rd)
 #ifdef HAVE_RT_PUSH_IPI
        rd->rto_cpu = -1;
        raw_spin_lock_init(&rd->rto_lock);
-       init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
+       rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);
 #endif
 
        rd->visit_gen = 0;
@@ -675,7 +675,7 @@ static void update_top_cache_domain(int cpu)
        sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
        rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
 
-       sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY);
+       sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
        rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
 }
 
@@ -1267,6 +1267,116 @@ next:
 }
 
 /*
+ * Asymmetric CPU capacity bits
+ */
+struct asym_cap_data {
+       struct list_head link;
+       unsigned long capacity;
+       unsigned long cpus[];
+};
+
+/*
+ * Set of available CPUs grouped by their corresponding capacities
+ * Each list entry contains a CPU mask reflecting CPUs that share the same
+ * capacity.
+ * The lifespan of data is unlimited.
+ */
+static LIST_HEAD(asym_cap_list);
+
+#define cpu_capacity_span(asym_data) to_cpumask((asym_data)->cpus)
+
+/*
+ * Verify whether there is any CPU capacity asymmetry in a given sched domain.
+ * Provides sd_flags reflecting the asymmetry scope.
+ */
+static inline int
+asym_cpu_capacity_classify(const struct cpumask *sd_span,
+                          const struct cpumask *cpu_map)
+{
+       struct asym_cap_data *entry;
+       int count = 0, miss = 0;
+
+       /*
+        * Count how many unique CPU capacities this domain spans across
+        * (compare sched_domain CPUs mask with ones representing  available
+        * CPUs capacities). Take into account CPUs that might be offline:
+        * skip those.
+        */
+       list_for_each_entry(entry, &asym_cap_list, link) {
+               if (cpumask_intersects(sd_span, cpu_capacity_span(entry)))
+                       ++count;
+               else if (cpumask_intersects(cpu_map, cpu_capacity_span(entry)))
+                       ++miss;
+       }
+
+       WARN_ON_ONCE(!count && !list_empty(&asym_cap_list));
+
+       /* No asymmetry detected */
+       if (count < 2)
+               return 0;
+       /* Some of the available CPU capacity values have not been detected */
+       if (miss)
+               return SD_ASYM_CPUCAPACITY;
+
+       /* Full asymmetry */
+       return SD_ASYM_CPUCAPACITY | SD_ASYM_CPUCAPACITY_FULL;
+
+}
+
+static inline void asym_cpu_capacity_update_data(int cpu)
+{
+       unsigned long capacity = arch_scale_cpu_capacity(cpu);
+       struct asym_cap_data *entry = NULL;
+
+       list_for_each_entry(entry, &asym_cap_list, link) {
+               if (capacity == entry->capacity)
+                       goto done;
+       }
+
+       entry = kzalloc(sizeof(*entry) + cpumask_size(), GFP_KERNEL);
+       if (WARN_ONCE(!entry, "Failed to allocate memory for asymmetry data\n"))
+               return;
+       entry->capacity = capacity;
+       list_add(&entry->link, &asym_cap_list);
+done:
+       __cpumask_set_cpu(cpu, cpu_capacity_span(entry));
+}
+
+/*
+ * Build-up/update list of CPUs grouped by their capacities
+ * An update requires explicit request to rebuild sched domains
+ * with state indicating CPU topology changes.
+ */
+static void asym_cpu_capacity_scan(void)
+{
+       struct asym_cap_data *entry, *next;
+       int cpu;
+
+       list_for_each_entry(entry, &asym_cap_list, link)
+               cpumask_clear(cpu_capacity_span(entry));
+
+       for_each_cpu_and(cpu, cpu_possible_mask, housekeeping_cpumask(HK_FLAG_DOMAIN))
+               asym_cpu_capacity_update_data(cpu);
+
+       list_for_each_entry_safe(entry, next, &asym_cap_list, link) {
+               if (cpumask_empty(cpu_capacity_span(entry))) {
+                       list_del(&entry->link);
+                       kfree(entry);
+               }
+       }
+
+       /*
+        * Only one capacity value has been detected i.e. this system is symmetric.
+        * No need to keep this data around.
+        */
+       if (list_is_singular(&asym_cap_list)) {
+               entry = list_first_entry(&asym_cap_list, typeof(*entry), link);
+               list_del(&entry->link);
+               kfree(entry);
+       }
+}
+
+/*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
  */
@@ -1372,6 +1482,8 @@ int                               sched_max_numa_distance;
 static int                     *sched_domains_numa_distance;
 static struct cpumask          ***sched_domains_numa_masks;
 int __read_mostly              node_reclaim_distance = RECLAIM_DISTANCE;
+
+static unsigned long __read_mostly *sched_numa_onlined_nodes;
 #endif
 
 /*
@@ -1399,11 +1511,12 @@ int __read_mostly               node_reclaim_distance = RECLAIM_DISTANCE;
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl,
        const struct cpumask *cpu_map,
-       struct sched_domain *child, int dflags, int cpu)
+       struct sched_domain *child, int cpu)
 {
        struct sd_data *sdd = &tl->data;
        struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
        int sd_id, sd_weight, sd_flags = 0;
+       struct cpumask *sd_span;
 
 #ifdef CONFIG_NUMA
        /*
@@ -1420,9 +1533,6 @@ sd_init(struct sched_domain_topology_level *tl,
                        "wrong sd_flags in topology description\n"))
                sd_flags &= TOPOLOGY_SD_FLAGS;
 
-       /* Apply detected topology flags */
-       sd_flags |= dflags;
-
        *sd = (struct sched_domain){
                .min_interval           = sd_weight,
                .max_interval           = 2*sd_weight,
@@ -1454,13 +1564,19 @@ sd_init(struct sched_domain_topology_level *tl,
 #endif
        };
 
-       cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
-       sd_id = cpumask_first(sched_domain_span(sd));
+       sd_span = sched_domain_span(sd);
+       cpumask_and(sd_span, cpu_map, tl->mask(cpu));
+       sd_id = cpumask_first(sd_span);
+
+       sd->flags |= asym_cpu_capacity_classify(sd_span, cpu_map);
+
+       WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
+                 (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY),
+                 "CPU capacity asymmetry not supported on SMT\n");
 
        /*
         * Convert topological properties into behaviour.
         */
-
        /* Don't attempt to spread across CPUs of different capacities. */
        if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child)
                sd->child->flags &= ~SD_PREFER_SIBLING;
@@ -1719,6 +1835,16 @@ void sched_init_numa(void)
                        sched_domains_numa_masks[i][j] = mask;
 
                        for_each_node(k) {
+                               /*
+                                * Distance information can be unreliable for
+                                * offline nodes, defer building the node
+                                * masks to its bringup.
+                                * This relies on all unique distance values
+                                * still being visible at init time.
+                                */
+                               if (!node_online(j))
+                                       continue;
+
                                if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
                                        sched_numa_warn("Node-distance not symmetric");
 
@@ -1772,6 +1898,53 @@ void sched_init_numa(void)
        sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
 
        init_numa_topology_type();
+
+       sched_numa_onlined_nodes = bitmap_alloc(nr_node_ids, GFP_KERNEL);
+       if (!sched_numa_onlined_nodes)
+               return;
+
+       bitmap_zero(sched_numa_onlined_nodes, nr_node_ids);
+       for_each_online_node(i)
+               bitmap_set(sched_numa_onlined_nodes, i, 1);
+}
+
+static void __sched_domains_numa_masks_set(unsigned int node)
+{
+       int i, j;
+
+       /*
+        * NUMA masks are not built for offline nodes in sched_init_numa().
+        * Thus, when a CPU of a never-onlined-before node gets plugged in,
+        * adding that new CPU to the right NUMA masks is not sufficient: the
+        * masks of that CPU's node must also be updated.
+        */
+       if (test_bit(node, sched_numa_onlined_nodes))
+               return;
+
+       bitmap_set(sched_numa_onlined_nodes, node, 1);
+
+       for (i = 0; i < sched_domains_numa_levels; i++) {
+               for (j = 0; j < nr_node_ids; j++) {
+                       if (!node_online(j) || node == j)
+                               continue;
+
+                       if (node_distance(j, node) > sched_domains_numa_distance[i])
+                               continue;
+
+                       /* Add remote nodes in our masks */
+                       cpumask_or(sched_domains_numa_masks[i][node],
+                                  sched_domains_numa_masks[i][node],
+                                  sched_domains_numa_masks[0][j]);
+               }
+       }
+
+       /*
+        * A new node has been brought up, potentially changing the topology
+        * classification.
+        *
+        * Note that this is racy vs any use of sched_numa_topology_type :/
+        */
+       init_numa_topology_type();
 }
 
 void sched_domains_numa_masks_set(unsigned int cpu)
@@ -1779,8 +1952,14 @@ void sched_domains_numa_masks_set(unsigned int cpu)
        int node = cpu_to_node(cpu);
        int i, j;
 
+       __sched_domains_numa_masks_set(node);
+
        for (i = 0; i < sched_domains_numa_levels; i++) {
                for (j = 0; j < nr_node_ids; j++) {
+                       if (!node_online(j))
+                               continue;
+
+                       /* Set ourselves in the remote node's masks */
                        if (node_distance(j, node) <= sched_domains_numa_distance[i])
                                cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
                }
@@ -1926,9 +2105,9 @@ static void __sdt_free(const struct cpumask *cpu_map)
 
 static struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
                const struct cpumask *cpu_map, struct sched_domain_attr *attr,
-               struct sched_domain *child, int dflags, int cpu)
+               struct sched_domain *child, int cpu)
 {
-       struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
+       struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
 
        if (child) {
                sd->level = child->level + 1;
@@ -1991,65 +2170,6 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
 }
 
 /*
- * Find the sched_domain_topology_level where all CPU capacities are visible
- * for all CPUs.
- */
-static struct sched_domain_topology_level
-*asym_cpu_capacity_level(const struct cpumask *cpu_map)
-{
-       int i, j, asym_level = 0;
-       bool asym = false;
-       struct sched_domain_topology_level *tl, *asym_tl = NULL;
-       unsigned long cap;
-
-       /* Is there any asymmetry? */
-       cap = arch_scale_cpu_capacity(cpumask_first(cpu_map));
-
-       for_each_cpu(i, cpu_map) {
-               if (arch_scale_cpu_capacity(i) != cap) {
-                       asym = true;
-                       break;
-               }
-       }
-
-       if (!asym)
-               return NULL;
-
-       /*
-        * Examine topology from all CPU's point of views to detect the lowest
-        * sched_domain_topology_level where a highest capacity CPU is visible
-        * to everyone.
-        */
-       for_each_cpu(i, cpu_map) {
-               unsigned long max_capacity = arch_scale_cpu_capacity(i);
-               int tl_id = 0;
-
-               for_each_sd_topology(tl) {
-                       if (tl_id < asym_level)
-                               goto next_level;
-
-                       for_each_cpu_and(j, tl->mask(i), cpu_map) {
-                               unsigned long capacity;
-
-                               capacity = arch_scale_cpu_capacity(j);
-
-                               if (capacity <= max_capacity)
-                                       continue;
-
-                               max_capacity = capacity;
-                               asym_level = tl_id;
-                               asym_tl = tl;
-                       }
-next_level:
-                       tl_id++;
-               }
-       }
-
-       return asym_tl;
-}
-
-
-/*
  * Build sched domains for a given set of CPUs and attach the sched domains
  * to the individual CPUs
  */
@@ -2061,7 +2181,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
        struct s_data d;
        struct rq *rq = NULL;
        int i, ret = -ENOMEM;
-       struct sched_domain_topology_level *tl_asym;
        bool has_asym = false;
 
        if (WARN_ON(cpumask_empty(cpu_map)))
@@ -2071,24 +2190,19 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
        if (alloc_state != sa_rootdomain)
                goto error;
 
-       tl_asym = asym_cpu_capacity_level(cpu_map);
-
        /* Set up domains for CPUs specified by the cpu_map: */
        for_each_cpu(i, cpu_map) {
                struct sched_domain_topology_level *tl;
-               int dflags = 0;
 
                sd = NULL;
                for_each_sd_topology(tl) {
-                       if (tl == tl_asym) {
-                               dflags |= SD_ASYM_CPUCAPACITY;
-                               has_asym = true;
-                       }
 
                        if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
                                goto error;
 
-                       sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
+                       sd = build_sched_domain(tl, cpu_map, attr, sd, i);
+
+                       has_asym |= sd->flags & SD_ASYM_CPUCAPACITY;
 
                        if (tl == sched_domain_topology)
                                *per_cpu_ptr(d.sd, i) = sd;
@@ -2217,6 +2331,7 @@ int sched_init_domains(const struct cpumask *cpu_map)
        zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
 
        arch_update_cpu_topology();
+       asym_cpu_capacity_scan();
        ndoms_cur = 1;
        doms_cur = alloc_sched_domains(ndoms_cur);
        if (!doms_cur)
@@ -2299,6 +2414,9 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
 
        /* Let the architecture update CPU core mappings: */
        new_topology = arch_update_cpu_topology();
+       /* Trigger rebuilding CPU capacity asymmetry data */
+       if (new_topology)
+               asym_cpu_capacity_scan();
 
        if (!doms_new) {
                WARN_ON_ONCE(dattr_new);