Merge branch 'akpm' (patches from Andrew)
[platform/kernel/linux-rpi.git] / arch / powerpc / mm / numa.c
index 03a81d6..1f61fa2 100644 (file)
@@ -221,7 +221,8 @@ static void initialize_distance_lookup_table(int nid,
        }
 }
 
-/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
+/*
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
  * info is found.
  */
 static int associativity_to_nid(const __be32 *associativity)
@@ -235,7 +236,7 @@ static int associativity_to_nid(const __be32 *associativity)
                nid = of_read_number(&associativity[min_common_depth], 1);
 
        /* POWER4 LPAR uses 0xffff as invalid node */
-       if (nid == 0xffff || nid >= MAX_NUMNODES)
+       if (nid == 0xffff || nid >= nr_node_ids)
                nid = NUMA_NO_NODE;
 
        if (nid > 0 &&
@@ -448,7 +449,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
                index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
                nid = of_read_number(&aa.arrays[index], 1);
 
-               if (nid == 0xffff || nid >= MAX_NUMNODES)
+               if (nid == 0xffff || nid >= nr_node_ids)
                        nid = default_nid;
 
                if (nid > 0) {
@@ -644,8 +645,9 @@ static inline int __init read_usm_ranges(const __be32 **usm)
  * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
  * node.  This assumes n_mem_{addr,size}_cells have been set.
  */
-static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
-                                       const __be32 **usm)
+static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+                                       const __be32 **usm,
+                                       void *data)
 {
        unsigned int ranges, is_kexec_kdump = 0;
        unsigned long base, size, sz;
@@ -657,7 +659,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
         */
        if ((lmb->flags & DRCONF_MEM_RESERVED)
            || !(lmb->flags & DRCONF_MEM_ASSIGNED))
-               return;
+               return 0;
 
        if (*usm)
                is_kexec_kdump = 1;
@@ -669,7 +671,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
        if (is_kexec_kdump) {
                ranges = read_usm_ranges(usm);
                if (!ranges) /* there are no (base, size) duple */
-                       return;
+                       return 0;
        }
 
        do {
@@ -686,6 +688,8 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
                if (sz)
                        memblock_set_node(base, sz, &memblock.memory, nid);
        } while (--ranges);
+
+       return 0;
 }
 
 static int __init parse_numa_properties(void)
@@ -787,7 +791,7 @@ new_range:
         */
        memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
        if (memory) {
-               walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
+               walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
                of_node_put(memory);
        }
 
@@ -983,28 +987,6 @@ static int __init early_numa(char *p)
 }
 early_param("numa", early_numa);
 
-/*
- * The platform can inform us through one of several mechanisms
- * (post-migration device tree updates, PRRN or VPHN) that the NUMA
- * assignment of a resource has changed. This controls whether we act
- * on that. Disabled by default.
- */
-static bool topology_updates_enabled;
-
-static int __init early_topology_updates(char *p)
-{
-       if (!p)
-               return 0;
-
-       if (!strcmp(p, "on")) {
-               pr_warn("Caution: enabling topology updates\n");
-               topology_updates_enabled = true;
-       }
-
-       return 0;
-}
-early_param("topology_updates", early_topology_updates);
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
  * Find the node associated with a hot added memory section for
@@ -1143,98 +1125,9 @@ u64 memory_hotplug_max(void)
 
 /* Virtual Processor Home Node (VPHN) support */
 #ifdef CONFIG_PPC_SPLPAR
-struct topology_update_data {
-       struct topology_update_data *next;
-       unsigned int cpu;
-       int old_nid;
-       int new_nid;
-};
-
-#define TOPOLOGY_DEF_TIMER_SECS        60
-
-static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
-static cpumask_t cpu_associativity_changes_mask;
-static int vphn_enabled;
-static int prrn_enabled;
-static void reset_topology_timer(void);
-static int topology_timer_secs = 1;
 static int topology_inited;
 
 /*
- * Change polling interval for associativity changes.
- */
-int timed_topology_update(int nsecs)
-{
-       if (vphn_enabled) {
-               if (nsecs > 0)
-                       topology_timer_secs = nsecs;
-               else
-                       topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
-
-               reset_topology_timer();
-       }
-
-       return 0;
-}
-
-/*
- * Store the current values of the associativity change counters in the
- * hypervisor.
- */
-static void setup_cpu_associativity_change_counters(void)
-{
-       int cpu;
-
-       /* The VPHN feature supports a maximum of 8 reference points */
-       BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
-
-       for_each_possible_cpu(cpu) {
-               int i;
-               u8 *counts = vphn_cpu_change_counts[cpu];
-               volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
-               for (i = 0; i < distance_ref_points_depth; i++)
-                       counts[i] = hypervisor_counts[i];
-       }
-}
-
-/*
- * The hypervisor maintains a set of 8 associativity change counters in
- * the VPA of each cpu that correspond to the associativity levels in the
- * ibm,associativity-reference-points property. When an associativity
- * level changes, the corresponding counter is incremented.
- *
- * Set a bit in cpu_associativity_changes_mask for each cpu whose home
- * node associativity levels have changed.
- *
- * Returns the number of cpus with unhandled associativity changes.
- */
-static int update_cpu_associativity_changes_mask(void)
-{
-       int cpu;
-       cpumask_t *changes = &cpu_associativity_changes_mask;
-
-       for_each_possible_cpu(cpu) {
-               int i, changed = 0;
-               u8 *counts = vphn_cpu_change_counts[cpu];
-               volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
-               for (i = 0; i < distance_ref_points_depth; i++) {
-                       if (hypervisor_counts[i] != counts[i]) {
-                               counts[i] = hypervisor_counts[i];
-                               changed = 1;
-                       }
-               }
-               if (changed) {
-                       cpumask_or(changes, changes, cpu_sibling_mask(cpu));
-                       cpu = cpu_last_thread_sibling(cpu);
-               }
-       }
-
-       return cpumask_weight(changes);
-}
-
-/*
  * Retrieve the new associativity information for a virtual processor's
  * home node.
  */
@@ -1249,7 +1142,6 @@ static long vphn_get_associativity(unsigned long cpu,
        switch (rc) {
        case H_SUCCESS:
                dbg("VPHN hcall succeeded. Reset polling...\n");
-               timed_topology_update(0);
                goto out;
 
        case H_FUNCTION:
@@ -1268,8 +1160,6 @@ static long vphn_get_associativity(unsigned long cpu,
                        , rc);
                break;
        }
-
-       stop_topology_update();
 out:
        return rc;
 }
@@ -1313,380 +1203,8 @@ int find_and_online_cpu_nid(int cpu)
        return new_nid;
 }
 
-/*
- * Update the CPU maps and sysfs entries for a single CPU when its NUMA
- * characteristics change. This function doesn't perform any locking and is
- * only safe to call from stop_machine().
- */
-static int update_cpu_topology(void *data)
-{
-       struct topology_update_data *update;
-       unsigned long cpu;
-
-       if (!data)
-               return -EINVAL;
-
-       cpu = smp_processor_id();
-
-       for (update = data; update; update = update->next) {
-               int new_nid = update->new_nid;
-               if (cpu != update->cpu)
-                       continue;
-
-               unmap_cpu_from_node(cpu);
-               map_cpu_to_node(cpu, new_nid);
-               set_cpu_numa_node(cpu, new_nid);
-               set_cpu_numa_mem(cpu, local_memory_node(new_nid));
-               vdso_getcpu_init();
-       }
-
-       return 0;
-}
-
-static int update_lookup_table(void *data)
-{
-       struct topology_update_data *update;
-
-       if (!data)
-               return -EINVAL;
-
-       /*
-        * Upon topology update, the numa-cpu lookup table needs to be updated
-        * for all threads in the core, including offline CPUs, to ensure that
-        * future hotplug operations respect the cpu-to-node associativity
-        * properly.
-        */
-       for (update = data; update; update = update->next) {
-               int nid, base, j;
-
-               nid = update->new_nid;
-               base = cpu_first_thread_sibling(update->cpu);
-
-               for (j = 0; j < threads_per_core; j++) {
-                       update_numa_cpu_lookup_table(base + j, nid);
-               }
-       }
-
-       return 0;
-}
-
-/*
- * Update the node maps and sysfs entries for each cpu whose home node
- * has changed. Returns 1 when the topology has changed, and 0 otherwise.
- *
- * cpus_locked says whether we already hold cpu_hotplug_lock.
- */
-int numa_update_cpu_topology(bool cpus_locked)
-{
-       unsigned int cpu, sibling, changed = 0;
-       struct topology_update_data *updates, *ud;
-       cpumask_t updated_cpus;
-       struct device *dev;
-       int weight, new_nid, i = 0;
-
-       if (!prrn_enabled && !vphn_enabled && topology_inited)
-               return 0;
-
-       weight = cpumask_weight(&cpu_associativity_changes_mask);
-       if (!weight)
-               return 0;
-
-       updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL);
-       if (!updates)
-               return 0;
-
-       cpumask_clear(&updated_cpus);
-
-       for_each_cpu(cpu, &cpu_associativity_changes_mask) {
-               /*
-                * If siblings aren't flagged for changes, updates list
-                * will be too short. Skip on this update and set for next
-                * update.
-                */
-               if (!cpumask_subset(cpu_sibling_mask(cpu),
-                                       &cpu_associativity_changes_mask)) {
-                       pr_info("Sibling bits not set for associativity "
-                                       "change, cpu%d\n", cpu);
-                       cpumask_or(&cpu_associativity_changes_mask,
-                                       &cpu_associativity_changes_mask,
-                                       cpu_sibling_mask(cpu));
-                       cpu = cpu_last_thread_sibling(cpu);
-                       continue;
-               }
-
-               new_nid = find_and_online_cpu_nid(cpu);
-
-               if (new_nid == numa_cpu_lookup_table[cpu]) {
-                       cpumask_andnot(&cpu_associativity_changes_mask,
-                                       &cpu_associativity_changes_mask,
-                                       cpu_sibling_mask(cpu));
-                       dbg("Assoc chg gives same node %d for cpu%d\n",
-                                       new_nid, cpu);
-                       cpu = cpu_last_thread_sibling(cpu);
-                       continue;
-               }
-
-               for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
-                       ud = &updates[i++];
-                       ud->next = &updates[i];
-                       ud->cpu = sibling;
-                       ud->new_nid = new_nid;
-                       ud->old_nid = numa_cpu_lookup_table[sibling];
-                       cpumask_set_cpu(sibling, &updated_cpus);
-               }
-               cpu = cpu_last_thread_sibling(cpu);
-       }
-
-       /*
-        * Prevent processing of 'updates' from overflowing array
-        * where last entry filled in a 'next' pointer.
-        */
-       if (i)
-               updates[i-1].next = NULL;
-
-       pr_debug("Topology update for the following CPUs:\n");
-       if (cpumask_weight(&updated_cpus)) {
-               for (ud = &updates[0]; ud; ud = ud->next) {
-                       pr_debug("cpu %d moving from node %d "
-                                         "to %d\n", ud->cpu,
-                                         ud->old_nid, ud->new_nid);
-               }
-       }
-
-       /*
-        * In cases where we have nothing to update (because the updates list
-        * is too short or because the new topology is same as the old one),
-        * skip invoking update_cpu_topology() via stop-machine(). This is
-        * necessary (and not just a fast-path optimization) since stop-machine
-        * can end up electing a random CPU to run update_cpu_topology(), and
-        * thus trick us into setting up incorrect cpu-node mappings (since
-        * 'updates' is kzalloc()'ed).
-        *
-        * And for the similar reason, we will skip all the following updating.
-        */
-       if (!cpumask_weight(&updated_cpus))
-               goto out;
-
-       if (cpus_locked)
-               stop_machine_cpuslocked(update_cpu_topology, &updates[0],
-                                       &updated_cpus);
-       else
-               stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
-
-       /*
-        * Update the numa-cpu lookup table with the new mappings, even for
-        * offline CPUs. It is best to perform this update from the stop-
-        * machine context.
-        */
-       if (cpus_locked)
-               stop_machine_cpuslocked(update_lookup_table, &updates[0],
-                                       cpumask_of(raw_smp_processor_id()));
-       else
-               stop_machine(update_lookup_table, &updates[0],
-                            cpumask_of(raw_smp_processor_id()));
-
-       for (ud = &updates[0]; ud; ud = ud->next) {
-               unregister_cpu_under_node(ud->cpu, ud->old_nid);
-               register_cpu_under_node(ud->cpu, ud->new_nid);
-
-               dev = get_cpu_device(ud->cpu);
-               if (dev)
-                       kobject_uevent(&dev->kobj, KOBJ_CHANGE);
-               cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
-               changed = 1;
-       }
-
-out:
-       kfree(updates);
-       return changed;
-}
-
-int arch_update_cpu_topology(void)
-{
-       return numa_update_cpu_topology(true);
-}
-
-static void topology_work_fn(struct work_struct *work)
-{
-       rebuild_sched_domains();
-}
-static DECLARE_WORK(topology_work, topology_work_fn);
-
-static void topology_schedule_update(void)
-{
-       schedule_work(&topology_work);
-}
-
-static void topology_timer_fn(struct timer_list *unused)
-{
-       if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
-               topology_schedule_update();
-       else if (vphn_enabled) {
-               if (update_cpu_associativity_changes_mask() > 0)
-                       topology_schedule_update();
-               reset_topology_timer();
-       }
-}
-static struct timer_list topology_timer;
-
-static void reset_topology_timer(void)
-{
-       if (vphn_enabled)
-               mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
-}
-
-#ifdef CONFIG_SMP
-
-static int dt_update_callback(struct notifier_block *nb,
-                               unsigned long action, void *data)
-{
-       struct of_reconfig_data *update = data;
-       int rc = NOTIFY_DONE;
-
-       switch (action) {
-       case OF_RECONFIG_UPDATE_PROPERTY:
-               if (of_node_is_type(update->dn, "cpu") &&
-                   !of_prop_cmp(update->prop->name, "ibm,associativity")) {
-                       u32 core_id;
-                       of_property_read_u32(update->dn, "reg", &core_id);
-                       rc = dlpar_cpu_readd(core_id);
-                       rc = NOTIFY_OK;
-               }
-               break;
-       }
-
-       return rc;
-}
-
-static struct notifier_block dt_update_nb = {
-       .notifier_call = dt_update_callback,
-};
-
-#endif
-
-/*
- * Start polling for associativity changes.
- */
-int start_topology_update(void)
-{
-       int rc = 0;
-
-       if (!topology_updates_enabled)
-               return 0;
-
-       if (firmware_has_feature(FW_FEATURE_PRRN)) {
-               if (!prrn_enabled) {
-                       prrn_enabled = 1;
-#ifdef CONFIG_SMP
-                       rc = of_reconfig_notifier_register(&dt_update_nb);
-#endif
-               }
-       }
-       if (firmware_has_feature(FW_FEATURE_VPHN) &&
-                  lppaca_shared_proc(get_lppaca())) {
-               if (!vphn_enabled) {
-                       vphn_enabled = 1;
-                       setup_cpu_associativity_change_counters();
-                       timer_setup(&topology_timer, topology_timer_fn,
-                                   TIMER_DEFERRABLE);
-                       reset_topology_timer();
-               }
-       }
-
-       pr_info("Starting topology update%s%s\n",
-               (prrn_enabled ? " prrn_enabled" : ""),
-               (vphn_enabled ? " vphn_enabled" : ""));
-
-       return rc;
-}
-
-/*
- * Disable polling for VPHN associativity changes.
- */
-int stop_topology_update(void)
-{
-       int rc = 0;
-
-       if (!topology_updates_enabled)
-               return 0;
-
-       if (prrn_enabled) {
-               prrn_enabled = 0;
-#ifdef CONFIG_SMP
-               rc = of_reconfig_notifier_unregister(&dt_update_nb);
-#endif
-       }
-       if (vphn_enabled) {
-               vphn_enabled = 0;
-               rc = del_timer_sync(&topology_timer);
-       }
-
-       pr_info("Stopping topology update\n");
-
-       return rc;
-}
-
-int prrn_is_enabled(void)
-{
-       return prrn_enabled;
-}
-
-static int topology_read(struct seq_file *file, void *v)
-{
-       if (vphn_enabled || prrn_enabled)
-               seq_puts(file, "on\n");
-       else
-               seq_puts(file, "off\n");
-
-       return 0;
-}
-
-static int topology_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, topology_read, NULL);
-}
-
-static ssize_t topology_write(struct file *file, const char __user *buf,
-                             size_t count, loff_t *off)
-{
-       char kbuf[4]; /* "on" or "off" plus null. */
-       int read_len;
-
-       read_len = count < 3 ? count : 3;
-       if (copy_from_user(kbuf, buf, read_len))
-               return -EINVAL;
-
-       kbuf[read_len] = '\0';
-
-       if (!strncmp(kbuf, "on", 2)) {
-               topology_updates_enabled = true;
-               start_topology_update();
-       } else if (!strncmp(kbuf, "off", 3)) {
-               stop_topology_update();
-               topology_updates_enabled = false;
-       } else
-               return -EINVAL;
-
-       return count;
-}
-
-static const struct proc_ops topology_proc_ops = {
-       .proc_read      = seq_read,
-       .proc_write     = topology_write,
-       .proc_open      = topology_open,
-       .proc_release   = single_release,
-};
-
 static int topology_update_init(void)
 {
-       start_topology_update();
-
-       if (vphn_enabled)
-               topology_schedule_update();
-
-       if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops))
-               return -ENOMEM;
-
        topology_inited = 1;
        return 0;
 }