From 9c15eeb5362c48dd27d51bd72e8873341fa9383c Mon Sep 17 00:00:00 2001 From: James Gowans Date: Thu, 8 Jun 2023 14:00:20 +0200 Subject: [PATCH] genirq: Allow fasteoi handler to resend interrupts on concurrent handling There is a class of interrupt controllers out there that, once they have signalled a given interrupt number, will still signal incoming instances of the *same* interrupt despite the original interrupt not having been EOIed yet. As long as the new interrupt reaches the *same* CPU, nothing bad happens, as that CPU still has its interrupts globally disabled, and we will only take the new interrupt once the interrupt has been EOIed. However, things become more "interesting" if an affinity change comes in while the interrupt is being handled. More specifically, while the per-irq lock is being dropped. This results in the affinity change taking place immediately. At this point, there is nothing that prevents the interrupt from firing on the new target CPU. We end-up with the interrupt running concurrently on two CPUs, which isn't a good thing. And that's where things become worse: the new CPU notices that the interrupt handling is in progress (irq_may_run() return false), and *drops the interrupt on the floor*. The whole race looks like this: CPU 0 | CPU 1 -----------------------------|----------------------------- interrupt start | handle_fasteoi_irq | set_affinity(CPU 1) handler | ... | interrupt start ... | handle_fasteoi_irq -> early out handle_fasteoi_irq return | interrupt end interrupt end | If the interrupt was an edge, too bad. The interrupt is lost, and the system will eventually die one way or another. Not great. A way to avoid this situation is to detect this problem at the point we handle the interrupt on the new target. Instead of dropping the interrupt, use the resend mechanism to force it to be replayed. Also, in order to limit the impact of this workaround to the pathetic architectures that require it, gate it behind a new irq flag aptly named IRQD_RESEND_WHEN_IN_PROGRESS. Suggested-by: Marc Zyngier Signed-off-by: James Gowans Cc: Thomas Gleixner Cc: Marc Zyngier Cc: KarimAllah Raslan Cc: Yipeng Zou Cc: Zhang Jianhua [maz: reworded commit mesage] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230608120021.3273400-3-jgowans@amazon.com --- include/linux/irq.h | 13 +++++++++++++ kernel/irq/chip.c | 16 +++++++++++++++- kernel/irq/debugfs.c | 2 ++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index d9c86db..d8a6fdc 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -223,6 +223,8 @@ struct irq_data { * irq_chip::irq_set_affinity() when deactivated. * IRQD_IRQ_ENABLED_ON_SUSPEND - Interrupt is enabled on suspend by irq pm if * irqchip have flag IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND set. + * IRQD_RESEND_WHEN_IN_PROGRESS - Interrupt may fire when already in progress in which + * case it must be resent at the next available opportunity. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -249,6 +251,7 @@ enum { IRQD_HANDLE_ENFORCE_IRQCTX = BIT(28), IRQD_AFFINITY_ON_ACTIVATE = BIT(29), IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(30), + IRQD_RESEND_WHEN_IN_PROGRESS = BIT(31), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -448,6 +451,16 @@ static inline bool irqd_affinity_on_activate(struct irq_data *d) return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE; } +static inline void irqd_set_resend_when_in_progress(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_RESEND_WHEN_IN_PROGRESS; +} + +static inline bool irqd_needs_resend_when_in_progress(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_RESEND_WHEN_IN_PROGRESS; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 49e7bc8..57cd8f4 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -692,8 +692,16 @@ void handle_fasteoi_irq(struct irq_desc *desc) raw_spin_lock(&desc->lock); - if (!irq_may_run(desc)) + /* + * When an affinity change races with IRQ handling, the next interrupt + * can arrive on the new CPU before the original CPU has completed + * handling the previous one - it may need to be resent. + */ + if (!irq_may_run(desc)) { + if (irqd_needs_resend_when_in_progress(&desc->irq_data)) + desc->istate |= IRQS_PENDING; goto out; + } desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); @@ -715,6 +723,12 @@ void handle_fasteoi_irq(struct irq_desc *desc) cond_unmask_eoi_irq(desc, chip); + /* + * When the race described above happens this will resend the interrupt. + */ + if (unlikely(desc->istate & IRQS_PENDING)) + check_irq_resend(desc, false); + raw_spin_unlock(&desc->lock); return; out: diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index bbcaac6..5971a66 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -133,6 +133,8 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX), BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND), + + BIT_MASK_DESCR(IRQD_RESEND_WHEN_IN_PROGRESS), }; static const struct irq_bit_descr irqdesc_states[] = { -- 2.7.4