From b664db8e3f976d9233cc9ea5e3f8a8c0bcabeb48 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Mon, 18 May 2020 20:42:45 -0300 Subject: [PATCH] powerpc/rtas: Implement reentrant rtas call Implement rtas_call_reentrant() for reentrant rtas-calls: "ibm,int-on", "ibm,int-off",ibm,get-xive" and "ibm,set-xive". On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4, items 2 and 3 say: 2 - For the PowerPC External Interrupt option: The * call must be reentrant to the number of processors on the platform. 3 - For the PowerPC External Interrupt option: The * argument call buffer for each simultaneous call must be physically unique. So, these rtas-calls can be called in a lockless way, if using a different buffer for each cpu doing such rtas call. For this, it was suggested to add the buffer (struct rtas_args) in the PACA struct, so each cpu can have it's own buffer. The PACA struct received a pointer to rtas buffer, which is allocated in the memory range available to rtas 32-bit. Reentrant rtas calls are useful to avoid deadlocks in crashing, where rtas-calls are needed, but some other thread crashed holding the rtas.lock. This is a backtrace of a deadlock from a kdump testing environment: #0 arch_spin_lock #1 lock_rtas () #2 rtas_call (token=8204, nargs=1, nret=1, outputs=0x0) #3 ics_rtas_mask_real_irq (hw_irq=4100) #4 machine_kexec_mask_interrupts #5 default_machine_crash_shutdown #6 machine_crash_shutdown #7 __crash_kexec #8 crash_kexec #9 oops_end Signed-off-by: Leonardo Bras [mpe: Move under #ifdef PSERIES to avoid build breakage] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200518234245.200672-3-leobras.c@gmail.com --- arch/powerpc/include/asm/paca.h | 2 ++ arch/powerpc/include/asm/rtas.h | 1 + arch/powerpc/kernel/paca.c | 32 +++++++++++++++++++++++ arch/powerpc/kernel/rtas.c | 52 +++++++++++++++++++++++++++++++++++++ arch/powerpc/sysdev/xics/ics-rtas.c | 22 ++++++++-------- 5 files changed, 98 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index e3cc9eb..45a839a 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -256,6 +257,7 @@ struct paca_struct { u64 l1d_flush_size; #endif #ifdef CONFIG_PPC_PSERIES + struct rtas_args *rtas_args_reentrant; u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */ #endif /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 977e326..014968f 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -236,6 +236,7 @@ extern struct rtas_t rtas; extern int rtas_token(const char *service); extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); +int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...); void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...); extern void __noreturn rtas_restart(char *cmd); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 3f91cca..8d96169 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "setup.h" @@ -164,6 +165,30 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) #endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_PPC_PSERIES +/** + * new_rtas_args() - Allocates rtas args + * @cpu: CPU number + * @limit: Memory limit for this allocation + * + * Allocates a struct rtas_args and return it's pointer, + * if not in Hypervisor mode + * + * Return: Pointer to allocated rtas_args + * NULL if CPU in Hypervisor Mode + */ +static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) +{ + limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX); + + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + return NULL; + + return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES, + limit, cpu); +} +#endif /* CONFIG_PPC_PSERIES */ + /* The Paca is an array with one entry per processor. Each contains an * lppaca, which contains the information shared between the * hypervisor and Linux. @@ -202,6 +227,10 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int /* For now -- if we have threads this will be adjusted later */ new_paca->tcd_ptr = &new_paca->tcd; #endif + +#ifdef CONFIG_PPC_PSERIES + new_paca->rtas_args_reentrant = NULL; +#endif } /* Put the paca pointer into r13 and SPRG_PACA */ @@ -274,6 +303,9 @@ void __init allocate_paca(int cpu) #ifdef CONFIG_PPC_BOOK3S_64 paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); #endif +#ifdef CONFIG_PPC_PSERIES + paca->rtas_args_reentrant = new_rtas_args(cpu, limit); +#endif paca_struct_size += sizeof(struct paca_struct); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c5fa251..a09eba0 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -41,6 +41,7 @@ #include #include #include +#include /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); @@ -1014,6 +1015,57 @@ out: free_cpumask_var(offline_mask); return atomic_read(&data.error); } + +/** + * rtas_call_reentrant() - Used for reentrant rtas calls + * @token: Token for desired reentrant RTAS call + * @nargs: Number of Input Parameters + * @nret: Number of Output Parameters + * @outputs: Array of outputs + * @...: Inputs for desired RTAS call + * + * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off", + * "ibm,get-xive" and "ibm,set-xive" are currently reentrant. + * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but + * PACA one instead. + * + * Return: -1 on error, + * First output value of RTAS call if (nret > 0), + * 0 otherwise, + */ +int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) +{ + va_list list; + struct rtas_args *args; + unsigned long flags; + int i, ret = 0; + + if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) + return -1; + + local_irq_save(flags); + preempt_disable(); + + /* We use the per-cpu (PACA) rtas args buffer */ + args = local_paca->rtas_args_reentrant; + + va_start(list, outputs); + va_rtas_call_unlocked(args, token, nargs, nret, list); + va_end(list); + + if (nret > 1 && outputs) + for (i = 0; i < nret - 1; ++i) + outputs[i] = be32_to_cpu(args->rets[i + 1]); + + if (nret > 0) + ret = be32_to_cpu(args->rets[0]); + + local_irq_restore(flags); + preempt_enable(); + + return ret; +} + #else /* CONFIG_PPC_PSERIES */ int rtas_ibm_suspend_me(u64 handle) { diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 6aabc74..4cf1800 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -50,8 +50,8 @@ static void ics_rtas_unmask_irq(struct irq_data *d) server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server, - DEFAULT_PRIORITY); + call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, + server, DEFAULT_PRIORITY); if (call_status != 0) { printk(KERN_ERR "%s: ibm_set_xive irq %u server %x returned %d\n", @@ -60,7 +60,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d) } /* Now unmask the interrupt (often a no-op) */ - call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq); + call_status = rtas_call_reentrant(ibm_int_on, 1, 1, NULL, hw_irq); if (call_status != 0) { printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -91,7 +91,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq) if (hw_irq == XICS_IPI) return; - call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq); + call_status = rtas_call_reentrant(ibm_int_off, 1, 1, NULL, hw_irq); if (call_status != 0) { printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -99,8 +99,8 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq) } /* Have to set XIVE to 0xff to be able to remove a slot */ - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, - xics_default_server, 0xff); + call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, + xics_default_server, 0xff); if (call_status != 0) { printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -131,7 +131,7 @@ static int ics_rtas_set_affinity(struct irq_data *d, if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return -1; - status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq); + status = rtas_call_reentrant(ibm_get_xive, 1, 3, xics_status, hw_irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", @@ -146,8 +146,8 @@ static int ics_rtas_set_affinity(struct irq_data *d, return -1; } - status = rtas_call(ibm_set_xive, 3, 1, NULL, - hw_irq, irq_server, xics_status[1]); + status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, + hw_irq, irq_server, xics_status[1]); if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", @@ -179,7 +179,7 @@ static int ics_rtas_map(struct ics *ics, unsigned int virq) return -EINVAL; /* Check if RTAS knows about this interrupt */ - rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq); + rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, hw_irq); if (rc) return -ENXIO; @@ -198,7 +198,7 @@ static long ics_rtas_get_server(struct ics *ics, unsigned long vec) { int rc, status[2]; - rc = rtas_call(ibm_get_xive, 1, 3, status, vec); + rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, vec); if (rc) return -1; return status[0]; -- 2.7.4