powerpc/pseries: Fix xive=off command line
authorGreg Kurz <groug@kaod.org>
Wed, 15 May 2019 10:05:01 +0000 (12:05 +0200)
committerMichael Ellerman <mpe@ellerman.id.au>
Sun, 2 Jun 2019 09:39:36 +0000 (19:39 +1000)
On POWER9, if the hypervisor supports XIVE exploitation mode, the
guest OS will unconditionally requests for the XIVE interrupt mode
even if XIVE was deactivated with the kernel command line xive=off.
Later on, when the spapr XIVE init code handles xive=off, it disables
XIVE and tries to fall back on the legacy mode XICS.

This discrepency causes a kernel panic because the hypervisor is
configured to provide the XIVE interrupt mode to the guest :

  kernel BUG at arch/powerpc/sysdev/xics/xics-common.c:135!
  ...
  NIP xics_smp_probe+0x38/0x98
  LR  xics_smp_probe+0x2c/0x98
  Call Trace:
    xics_smp_probe+0x2c/0x98 (unreliable)
    pSeries_smp_probe+0x40/0xa0
    smp_prepare_cpus+0x62c/0x6ec
    kernel_init_freeable+0x148/0x448
    kernel_init+0x2c/0x148
    ret_from_kernel_thread+0x5c/0x68

Look for xive=off during prom_init and don't ask for XIVE in this
case. One exception though: if the host only supports XIVE, we still
want to boot so we ignore xive=off.

Similarly, have the spapr XIVE init code to looking at the interrupt
mode negotiated during CAS, and ignore xive=off if the hypervisor only
supports XIVE.

Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller")
Cc: stable@vger.kernel.org # v4.20
Reported-by: Pavithra R. Prakash <pavrampu@in.ibm.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/kernel/prom_init.c
arch/powerpc/sysdev/xive/spapr.c

index bab79c5..17f1ae7 100644 (file)
@@ -172,6 +172,7 @@ static unsigned long __prombss prom_tce_alloc_end;
 
 #ifdef CONFIG_PPC_PSERIES
 static bool __prombss prom_radix_disable;
+static bool __prombss prom_xive_disable;
 #endif
 
 struct platform_support {
@@ -808,6 +809,12 @@ static void __init early_cmdline_parse(void)
        }
        if (prom_radix_disable)
                prom_debug("Radix disabled from cmdline\n");
+
+       opt = prom_strstr(prom_cmd_line, "xive=off");
+       if (opt) {
+               prom_xive_disable = true;
+               prom_debug("XIVE disabled from cmdline\n");
+       }
 #endif /* CONFIG_PPC_PSERIES */
 }
 
@@ -1216,10 +1223,17 @@ static void __init prom_parse_xive_model(u8 val,
        switch (val) {
        case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
                prom_debug("XIVE - either mode supported\n");
-               support->xive = true;
+               support->xive = !prom_xive_disable;
                break;
        case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
                prom_debug("XIVE - exploitation mode supported\n");
+               if (prom_xive_disable) {
+                       /*
+                        * If we __have__ to do XIVE, we're better off ignoring
+                        * the command line rather than not booting.
+                        */
+                       prom_printf("WARNING: Ignoring cmdline option xive=off\n");
+               }
                support->xive = true;
                break;
        case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
index 575db3b..2e2d1b8 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/cpumask.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
+#include <linux/libfdt.h>
 
 #include <asm/prom.h>
 #include <asm/io.h>
@@ -663,6 +664,55 @@ static bool xive_get_max_prio(u8 *max_prio)
        return true;
 }
 
+static const u8 *get_vec5_feature(unsigned int index)
+{
+       unsigned long root, chosen;
+       int size;
+       const u8 *vec5;
+
+       root = of_get_flat_dt_root();
+       chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+       if (chosen == -FDT_ERR_NOTFOUND)
+               return NULL;
+
+       vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
+       if (!vec5)
+               return NULL;
+
+       if (size <= index)
+               return NULL;
+
+       return vec5 + index;
+}
+
+static bool xive_spapr_disabled(void)
+{
+       const u8 *vec5_xive;
+
+       vec5_xive = get_vec5_feature(OV5_INDX(OV5_XIVE_SUPPORT));
+       if (vec5_xive) {
+               u8 val;
+
+               val = *vec5_xive & OV5_FEAT(OV5_XIVE_SUPPORT);
+               switch (val) {
+               case OV5_FEAT(OV5_XIVE_EITHER):
+               case OV5_FEAT(OV5_XIVE_LEGACY):
+                       break;
+               case OV5_FEAT(OV5_XIVE_EXPLOIT):
+                       /* Hypervisor only supports XIVE */
+                       if (xive_cmdline_disabled)
+                               pr_warn("WARNING: Ignoring cmdline option xive=off\n");
+                       return false;
+               default:
+                       pr_warn("%s: Unknown xive support option: 0x%x\n",
+                               __func__, val);
+                       break;
+               }
+       }
+
+       return xive_cmdline_disabled;
+}
+
 bool __init xive_spapr_init(void)
 {
        struct device_node *np;
@@ -675,7 +725,7 @@ bool __init xive_spapr_init(void)
        const __be32 *reg;
        int i;
 
-       if (xive_cmdline_disabled)
+       if (xive_spapr_disabled())
                return false;
 
        pr_devel("%s()\n", __func__);