powerpc/pseries: Add support for hash table resizing
authorDavid Gibson <david@gibson.dropbear.id.au>
Fri, 9 Dec 2016 00:07:36 +0000 (11:07 +1100)
committerMichael Ellerman <mpe@ellerman.id.au>
Fri, 10 Feb 2017 02:27:55 +0000 (13:27 +1100)
This adds support for using two hypercalls to change the size of the
main hash page table while running as a PAPR guest. For now these
hypercalls are only in experimental qemu versions.

The interface is two part: first H_RESIZE_HPT_PREPARE is used to
allocate and prepare the new hash table. This may be slow, but can be
done asynchronously. Then, H_RESIZE_HPT_COMMIT is used to switch to the
new hash table. This requires that no CPUs be concurrently updating the
HPT, and so must be run under stop_machine().

This also adds a debugfs file which can be used to manually control
HPT resizing or testing purposes.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Paul Mackerras <paulus@samba.org>
[mpe: Rename the debugfs file to "hpt_order"]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/book3s/64/mmu-hash.h
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/platforms/pseries/lpar.c

index 823015c..52d8d1e 100644 (file)
@@ -157,6 +157,7 @@ struct mmu_hash_ops {
                                               unsigned long addr,
                                               unsigned char *hpte_slot_array,
                                               int psize, int ssize, int local);
+       int             (*resize_hpt)(unsigned long shift);
        /*
         * Special for kexec.
         * To be called in real mode with interrupts disabled. No locks are
index 67e19a0..a3371d4 100644 (file)
@@ -35,7 +35,9 @@
 #include <linux/memblock.h>
 #include <linux/context_tracking.h>
 #include <linux/libfdt.h>
+#include <linux/debugfs.h>
 
+#include <asm/debug.h>
 #include <asm/processor.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
@@ -1795,3 +1797,34 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
        /* Finally limit subsequent allocations */
        memblock_set_current_limit(ppc64_rma_size);
 }
+
+#ifdef CONFIG_DEBUG_FS
+
+static int hpt_order_get(void *data, u64 *val)
+{
+       *val = ppc64_pft_size;
+       return 0;
+}
+
+static int hpt_order_set(void *data, u64 val)
+{
+       if (!mmu_hash_ops.resize_hpt)
+               return -ENODEV;
+
+       return mmu_hash_ops.resize_hpt(val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
+
+static int __init hash64_debugfs(void)
+{
+       if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root,
+                                NULL, &fops_hpt_order)) {
+               pr_err("lpar: unable to create hpt_order debugsfs file\n");
+       }
+
+       return 0;
+}
+machine_device_initcall(pseries, hash64_debugfs);
+
+#endif /* CONFIG_DEBUG_FS */
index 5dc1c3c..c2e13a5 100644 (file)
@@ -27,6 +27,8 @@
 #include <linux/console.h>
 #include <linux/export.h>
 #include <linux/jump_label.h>
+#include <linux/delay.h>
+#include <linux/stop_machine.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
@@ -609,6 +611,112 @@ static int __init disable_bulk_remove(char *str)
 
 __setup("bulk_remove=", disable_bulk_remove);
 
+#define HPT_RESIZE_TIMEOUT     10000 /* ms */
+
+struct hpt_resize_state {
+       unsigned long shift;
+       int commit_rc;
+};
+
+static int pseries_lpar_resize_hpt_commit(void *data)
+{
+       struct hpt_resize_state *state = data;
+
+       state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
+       if (state->commit_rc != H_SUCCESS)
+               return -EIO;
+
+       /* Hypervisor has transitioned the HTAB, update our globals */
+       ppc64_pft_size = state->shift;
+       htab_size_bytes = 1UL << ppc64_pft_size;
+       htab_hash_mask = (htab_size_bytes >> 7) - 1;
+
+       return 0;
+}
+
+/* Must be called in user context */
+static int pseries_lpar_resize_hpt(unsigned long shift)
+{
+       struct hpt_resize_state state = {
+               .shift = shift,
+               .commit_rc = H_FUNCTION,
+       };
+       unsigned int delay, total_delay = 0;
+       int rc;
+       ktime_t t0, t1, t2;
+
+       might_sleep();
+
+       if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+               return -ENODEV;
+
+       printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n",
+              shift);
+
+       t0 = ktime_get();
+
+       rc = plpar_resize_hpt_prepare(0, shift);
+       while (H_IS_LONG_BUSY(rc)) {
+               delay = get_longbusy_msecs(rc);
+               total_delay += delay;
+               if (total_delay > HPT_RESIZE_TIMEOUT) {
+                       /* prepare with shift==0 cancels an in-progress resize */
+                       rc = plpar_resize_hpt_prepare(0, 0);
+                       if (rc != H_SUCCESS)
+                               printk(KERN_WARNING
+                                      "lpar: Unexpected error %d cancelling timed out HPT resize\n",
+                                      rc);
+                       return -ETIMEDOUT;
+               }
+               msleep(delay);
+               rc = plpar_resize_hpt_prepare(0, shift);
+       };
+
+       switch (rc) {
+       case H_SUCCESS:
+               /* Continue on */
+               break;
+
+       case H_PARAMETER:
+               return -EINVAL;
+       case H_RESOURCE:
+               return -EPERM;
+       default:
+               printk(KERN_WARNING
+                      "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n",
+                      rc);
+               return -EIO;
+       }
+
+       t1 = ktime_get();
+
+       rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
+
+       t2 = ktime_get();
+
+       if (rc != 0) {
+               switch (state.commit_rc) {
+               case H_PTEG_FULL:
+                       printk(KERN_WARNING
+                              "lpar: Hash collision while resizing HPT\n");
+                       return -ENOSPC;
+
+               default:
+                       printk(KERN_WARNING
+                              "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
+                              state.commit_rc);
+                       return -EIO;
+               };
+       }
+
+       printk(KERN_INFO
+              "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
+              shift, (long long) ktime_ms_delta(t1, t0),
+              (long long) ktime_ms_delta(t2, t1));
+
+       return 0;
+}
+
 void __init hpte_init_pseries(void)
 {
        mmu_hash_ops.hpte_invalidate     = pSeries_lpar_hpte_invalidate;
@@ -620,6 +728,7 @@ void __init hpte_init_pseries(void)
        mmu_hash_ops.flush_hash_range    = pSeries_lpar_flush_hash_range;
        mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
        mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+       mmu_hash_ops.resize_hpt          = pseries_lpar_resize_hpt;
 }
 
 #ifdef CONFIG_PPC_SMLPAR