1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright 2016,2017 IBM Corporation.
6 #define pr_fmt(fmt) "xive: " fmt
8 #include <linux/types.h>
10 #include <linux/debugfs.h>
11 #include <linux/smp.h>
12 #include <linux/interrupt.h>
13 #include <linux/seq_file.h>
14 #include <linux/init.h>
16 #include <linux/of_address.h>
17 #include <linux/slab.h>
18 #include <linux/spinlock.h>
19 #include <linux/delay.h>
20 #include <linux/cpumask.h>
22 #include <linux/kmemleak.h>
24 #include <asm/machdep.h>
28 #include <asm/errno.h>
30 #include <asm/xive-regs.h>
32 #include <asm/kvm_ppc.h>
34 #include "xive-internal.h"
37 static u32 xive_provision_size;
38 static u32 *xive_provision_chips;
39 static u32 xive_provision_chip_count;
40 static u32 xive_queue_shift;
41 static u32 xive_pool_vps = XIVE_INVALID_VP;
42 static struct kmem_cache *xive_provision_cache;
43 static bool xive_has_single_esc;
44 bool xive_has_save_restore;
46 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
48 __be64 flags, eoi_page, trig_page;
49 __be32 esb_shift, src_chip;
53 memset(data, 0, sizeof(*data));
55 rc = opal_xive_get_irq_info(hw_irq, &flags, &eoi_page, &trig_page,
56 &esb_shift, &src_chip);
58 pr_err("opal_xive_get_irq_info(0x%x) returned %lld\n",
63 opal_flags = be64_to_cpu(flags);
64 if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
65 data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
66 if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2)
67 data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
68 if (opal_flags & OPAL_XIVE_IRQ_LSI)
69 data->flags |= XIVE_IRQ_FLAG_LSI;
70 data->eoi_page = be64_to_cpu(eoi_page);
71 data->trig_page = be64_to_cpu(trig_page);
72 data->esb_shift = be32_to_cpu(esb_shift);
73 data->src_chip = be32_to_cpu(src_chip);
75 data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
76 if (!data->eoi_mmio) {
77 pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
81 data->hw_irq = hw_irq;
85 if (data->trig_page == data->eoi_page) {
86 data->trig_mmio = data->eoi_mmio;
90 data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
91 if (!data->trig_mmio) {
92 pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
97 EXPORT_SYMBOL_GPL(xive_native_populate_irq_data);
99 int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
104 rc = opal_xive_set_irq_config(hw_irq, target, prio, sw_irq);
107 msleep(OPAL_BUSY_DELAY_MS);
109 return rc == 0 ? 0 : -ENXIO;
111 EXPORT_SYMBOL_GPL(xive_native_configure_irq);
113 static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
120 rc = opal_xive_get_irq_config(hw_irq, &vp, prio, &lirq);
122 *target = be64_to_cpu(vp);
123 *sw_irq = be32_to_cpu(lirq);
125 return rc == 0 ? 0 : -ENXIO;
128 #define vp_err(vp, fmt, ...) pr_err("VP[0x%x]: " fmt, vp, ##__VA_ARGS__)
130 /* This can be called multiple time to change a queue configuration */
131 int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
132 __be32 *qpage, u32 order, bool can_escalate)
137 u64 flags, qpage_phys;
139 /* If there's an actual queue page, clean it */
143 qpage_phys = __pa(qpage);
147 /* Initialize the rest of the fields */
148 q->msk = order ? ((1u << (order - 2)) - 1) : 0;
152 rc = opal_xive_get_queue_info(vp_id, prio, NULL, NULL,
157 vp_err(vp_id, "Failed to get queue %d info : %lld\n", prio, rc);
161 q->eoi_phys = be64_to_cpu(qeoi_page_be);
164 flags = OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED;
166 /* Escalation needed ? */
168 q->esc_irq = be32_to_cpu(esc_irq_be);
169 flags |= OPAL_XIVE_EQ_ESCALATE;
172 /* Configure and enable the queue in HW */
174 rc = opal_xive_set_queue_info(vp_id, prio, qpage_phys, order, flags);
177 msleep(OPAL_BUSY_DELAY_MS);
180 vp_err(vp_id, "Failed to set queue %d info: %lld\n", prio, rc);
184 * KVM code requires all of the above to be visible before
185 * q->qpage is set due to how it manages IPI EOIs
193 EXPORT_SYMBOL_GPL(xive_native_configure_queue);
195 static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
199 /* Disable the queue in HW */
201 rc = opal_xive_set_queue_info(vp_id, prio, 0, 0, 0);
204 msleep(OPAL_BUSY_DELAY_MS);
207 vp_err(vp_id, "Failed to disable queue %d : %lld\n", prio, rc);
210 void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
212 __xive_native_disable_queue(vp_id, q, prio);
214 EXPORT_SYMBOL_GPL(xive_native_disable_queue);
216 static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
218 struct xive_q *q = &xc->queue[prio];
221 qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
223 return PTR_ERR(qpage);
225 return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
226 q, prio, qpage, xive_queue_shift, false);
229 static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
231 struct xive_q *q = &xc->queue[prio];
232 unsigned int alloc_order;
235 * We use the variant with no iounmap as this is called on exec
236 * from an IPI and iounmap isn't safe
238 __xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
239 alloc_order = xive_alloc_order(xive_queue_shift);
240 free_pages((unsigned long)q->qpage, alloc_order);
244 static bool xive_native_match(struct device_node *node)
246 return of_device_is_compatible(node, "ibm,opal-xive-vc");
249 static s64 opal_xive_allocate_irq(u32 chip_id)
251 s64 irq = opal_xive_allocate_irq_raw(chip_id);
254 * Old versions of skiboot can incorrectly return 0xffffffff to
255 * indicate no space, fix it up here.
257 return irq == 0xffffffff ? OPAL_RESOURCE : irq;
261 static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
265 /* Allocate an IPI and populate info about it */
267 irq = opal_xive_allocate_irq(xc->chip_id);
268 if (irq == OPAL_BUSY) {
269 msleep(OPAL_BUSY_DELAY_MS);
273 pr_err("Failed to allocate IPI on CPU %d\n", cpu);
281 #endif /* CONFIG_SMP */
283 u32 xive_native_alloc_irq_on_chip(u32 chip_id)
288 rc = opal_xive_allocate_irq(chip_id);
291 msleep(OPAL_BUSY_DELAY_MS);
297 EXPORT_SYMBOL_GPL(xive_native_alloc_irq_on_chip);
299 void xive_native_free_irq(u32 irq)
302 s64 rc = opal_xive_free_irq(irq);
305 msleep(OPAL_BUSY_DELAY_MS);
308 EXPORT_SYMBOL_GPL(xive_native_free_irq);
311 static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
316 if (xc->hw_ipi == XIVE_BAD_IRQ)
319 rc = opal_xive_free_irq(xc->hw_ipi);
320 if (rc == OPAL_BUSY) {
321 msleep(OPAL_BUSY_DELAY_MS);
324 xc->hw_ipi = XIVE_BAD_IRQ;
328 #endif /* CONFIG_SMP */
330 static void xive_native_shutdown(void)
332 /* Switch the XIVE to emulation mode */
333 opal_xive_reset(OPAL_XIVE_MODE_EMU);
337 * Perform an "ack" cycle on the current thread, thus
338 * grabbing the pending active priorities and updating
339 * the CPPR to the most favored one.
341 static void xive_native_update_pending(struct xive_cpu *xc)
346 /* Perform the acknowledge hypervisor to register cycle */
347 ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_HV_REG));
349 /* Synchronize subsequent queue accesses */
353 * Grab the CPPR and the "HE" field which indicates the source
354 * of the hypervisor interrupt (if any)
357 he = (ack >> 8) >> 6;
359 case TM_QW3_NSR_HE_NONE: /* Nothing to see here */
361 case TM_QW3_NSR_HE_PHYS: /* Physical thread interrupt */
364 /* Mark the priority pending */
365 xc->pending_prio |= 1 << cppr;
368 * A new interrupt should never have a CPPR less favored
369 * than our current one.
371 if (cppr >= xc->cppr)
372 pr_err("CPU %d odd ack CPPR, got %d at %d\n",
373 smp_processor_id(), cppr, xc->cppr);
375 /* Update our idea of what the CPPR is */
378 case TM_QW3_NSR_HE_POOL: /* HV Pool interrupt (unused) */
379 case TM_QW3_NSR_HE_LSI: /* Legacy FW LSI (unused) */
380 pr_err("CPU %d got unexpected interrupt type HE=%d\n",
381 smp_processor_id(), he);
386 static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc)
388 xc->chip_id = cpu_to_chip_id(cpu);
391 static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
398 if (xive_pool_vps == XIVE_INVALID_VP)
401 /* Check if pool VP already active, if it is, pull it */
402 if (in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2) & TM_QW2W2_VP)
403 in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
405 /* Enable the pool VP */
406 vp = xive_pool_vps + cpu;
408 rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
411 msleep(OPAL_BUSY_DELAY_MS);
414 pr_err("Failed to enable pool VP on CPU %d\n", cpu);
418 /* Grab it's CAM value */
419 rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL);
421 pr_err("Failed to get pool VP info CPU %d\n", cpu);
424 vp_cam = be64_to_cpu(vp_cam_be);
426 /* Push it on the CPU (set LSMFB to 0xff to skip backlog scan) */
427 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD0, 0xff);
428 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2, TM_QW2W2_VP | vp_cam);
431 static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
436 if (xive_pool_vps == XIVE_INVALID_VP)
439 /* Pull the pool VP from the CPU */
440 in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
443 vp = xive_pool_vps + cpu;
445 rc = opal_xive_set_vp_info(vp, 0, 0);
448 msleep(OPAL_BUSY_DELAY_MS);
452 void xive_native_sync_source(u32 hw_irq)
454 opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
456 EXPORT_SYMBOL_GPL(xive_native_sync_source);
458 void xive_native_sync_queue(u32 hw_irq)
460 opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq);
462 EXPORT_SYMBOL_GPL(xive_native_sync_queue);
464 #ifdef CONFIG_DEBUG_FS
465 static int xive_native_debug_create(struct dentry *xive_dir)
467 debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore);
472 static const struct xive_ops xive_native_ops = {
473 .populate_irq_data = xive_native_populate_irq_data,
474 .configure_irq = xive_native_configure_irq,
475 .get_irq_config = xive_native_get_irq_config,
476 .setup_queue = xive_native_setup_queue,
477 .cleanup_queue = xive_native_cleanup_queue,
478 .match = xive_native_match,
479 .shutdown = xive_native_shutdown,
480 .update_pending = xive_native_update_pending,
481 .prepare_cpu = xive_native_prepare_cpu,
482 .setup_cpu = xive_native_setup_cpu,
483 .teardown_cpu = xive_native_teardown_cpu,
484 .sync_source = xive_native_sync_source,
486 .get_ipi = xive_native_get_ipi,
487 .put_ipi = xive_native_put_ipi,
488 #endif /* CONFIG_SMP */
489 #ifdef CONFIG_DEBUG_FS
490 .debug_create = xive_native_debug_create,
491 #endif /* CONFIG_DEBUG_FS */
495 static bool __init xive_parse_provisioning(struct device_node *np)
499 if (of_property_read_u32(np, "ibm,xive-provision-page-size",
500 &xive_provision_size) < 0)
502 rc = of_property_count_elems_of_size(np, "ibm,xive-provision-chips", 4);
504 pr_err("Error %d getting provision chips array\n", rc);
507 xive_provision_chip_count = rc;
511 xive_provision_chips = kcalloc(4, xive_provision_chip_count,
513 if (WARN_ON(!xive_provision_chips))
516 rc = of_property_read_u32_array(np, "ibm,xive-provision-chips",
517 xive_provision_chips,
518 xive_provision_chip_count);
520 pr_err("Error %d reading provision chips array\n", rc);
524 xive_provision_cache = kmem_cache_create("xive-provision",
528 if (!xive_provision_cache) {
529 pr_err("Failed to allocate provision cache\n");
535 static void __init xive_native_setup_pools(void)
537 /* Allocate a pool big enough */
538 pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids);
540 xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
541 if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
542 pr_err("Failed to allocate pool VP, KVM might not function\n");
544 pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n",
545 xive_pool_vps, nr_cpu_ids);
548 u32 xive_native_default_eq_shift(void)
550 return xive_queue_shift;
552 EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
554 unsigned long xive_tima_os;
555 EXPORT_SYMBOL_GPL(xive_tima_os);
557 bool __init xive_native_init(void)
559 struct device_node *np;
562 struct property *prop;
568 if (xive_cmdline_disabled)
571 pr_devel("xive_native_init()\n");
572 np = of_find_compatible_node(NULL, NULL, "ibm,opal-xive-pe");
574 pr_devel("not found !\n");
577 pr_devel("Found %pOF\n", np);
579 /* Resource 1 is HV window */
580 if (of_address_to_resource(np, 1, &r)) {
581 pr_err("Failed to get thread mgmnt area resource\n");
584 tima = ioremap(r.start, resource_size(&r));
586 pr_err("Failed to map thread mgmnt area\n");
590 /* Read number of priorities */
591 if (of_property_read_u32(np, "ibm,xive-#priorities", &val) == 0)
594 /* Iterate the EQ sizes and pick one */
595 of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) {
596 xive_queue_shift = val;
597 if (val == PAGE_SHIFT)
601 /* Do we support single escalation */
602 xive_has_single_esc = of_property_read_bool(np, "single-escalation-support");
604 xive_has_save_restore = of_property_read_bool(np, "vp-save-restore");
606 /* Configure Thread Management areas for KVM */
607 for_each_possible_cpu(cpu)
608 kvmppc_set_xive_tima(cpu, r.start, tima);
610 /* Resource 2 is OS window */
611 if (of_address_to_resource(np, 2, &r)) {
612 pr_err("Failed to get thread mgmnt area resource\n");
616 xive_tima_os = r.start;
618 /* Grab size of provisioning pages */
619 xive_parse_provisioning(np);
621 /* Switch the XIVE to exploitation mode */
622 rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL);
624 pr_err("Switch to exploitation mode failed with error %lld\n", rc);
628 /* Setup some dummy HV pool VPs */
629 xive_native_setup_pools();
631 /* Initialize XIVE core with our backend */
632 if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS,
634 opal_xive_reset(OPAL_XIVE_MODE_EMU);
638 pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
646 static bool xive_native_provision_pages(void)
651 for (i = 0; i < xive_provision_chip_count; i++) {
652 u32 chip = xive_provision_chips[i];
655 * XXX TODO: Try to make the allocation local to the node where
658 p = kmem_cache_alloc(xive_provision_cache, GFP_KERNEL);
660 pr_err("Failed to allocate provisioning page\n");
664 opal_xive_donate_page(chip, __pa(p));
669 u32 xive_native_alloc_vp_block(u32 max_vcpus)
674 order = fls(max_vcpus) - 1;
675 if (max_vcpus > (1 << order))
678 pr_debug("VP block alloc, for max VCPUs %d use order %d\n",
682 rc = opal_xive_alloc_vp_block(order);
685 msleep(OPAL_BUSY_DELAY_MS);
687 case OPAL_XIVE_PROVISIONING:
688 if (!xive_native_provision_pages())
689 return XIVE_INVALID_VP;
693 pr_err("OPAL failed to allocate VCPUs order %d, err %lld\n",
695 return XIVE_INVALID_VP;
701 EXPORT_SYMBOL_GPL(xive_native_alloc_vp_block);
703 void xive_native_free_vp_block(u32 vp_base)
707 if (vp_base == XIVE_INVALID_VP)
710 rc = opal_xive_free_vp_block(vp_base);
712 pr_warn("OPAL error %lld freeing VP block\n", rc);
714 EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
716 int xive_native_enable_vp(u32 vp_id, bool single_escalation)
719 u64 flags = OPAL_XIVE_VP_ENABLED;
721 if (single_escalation)
722 flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
724 rc = opal_xive_set_vp_info(vp_id, flags, 0);
727 msleep(OPAL_BUSY_DELAY_MS);
730 vp_err(vp_id, "Failed to enable VP : %lld\n", rc);
731 return rc ? -EIO : 0;
733 EXPORT_SYMBOL_GPL(xive_native_enable_vp);
735 int xive_native_disable_vp(u32 vp_id)
740 rc = opal_xive_set_vp_info(vp_id, 0, 0);
743 msleep(OPAL_BUSY_DELAY_MS);
746 vp_err(vp_id, "Failed to disable VP : %lld\n", rc);
747 return rc ? -EIO : 0;
749 EXPORT_SYMBOL_GPL(xive_native_disable_vp);
751 int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
754 __be32 vp_chip_id_be;
757 rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be);
759 vp_err(vp_id, "Failed to get VP info : %lld\n", rc);
762 *out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu;
763 *out_chip_id = be32_to_cpu(vp_chip_id_be);
767 EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
769 bool xive_native_has_single_escalation(void)
771 return xive_has_single_esc;
773 EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
775 bool xive_native_has_save_restore(void)
777 return xive_has_save_restore;
779 EXPORT_SYMBOL_GPL(xive_native_has_save_restore);
781 int xive_native_get_queue_info(u32 vp_id, u32 prio,
785 u32 *out_escalate_irq,
795 rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize,
796 &qeoi_page, &escalate_irq, &qflags);
798 vp_err(vp_id, "failed to get queue %d info : %lld\n", prio, rc);
803 *out_qpage = be64_to_cpu(qpage);
805 *out_qsize = be64_to_cpu(qsize);
807 *out_qeoi_page = be64_to_cpu(qeoi_page);
808 if (out_escalate_irq)
809 *out_escalate_irq = be32_to_cpu(escalate_irq);
811 *out_qflags = be64_to_cpu(qflags);
815 EXPORT_SYMBOL_GPL(xive_native_get_queue_info);
817 int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex)
823 rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle,
826 vp_err(vp_id, "failed to get queue %d state : %lld\n", prio, rc);
831 *qtoggle = be32_to_cpu(opal_qtoggle);
833 *qindex = be32_to_cpu(opal_qindex);
837 EXPORT_SYMBOL_GPL(xive_native_get_queue_state);
839 int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
843 rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex);
845 vp_err(vp_id, "failed to set queue %d state : %lld\n", prio, rc);
851 EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
853 bool xive_native_has_queue_state_support(void)
855 return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) &&
856 opal_check_token(OPAL_XIVE_SET_QUEUE_STATE);
858 EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support);
860 int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
865 rc = opal_xive_get_vp_state(vp_id, &state);
867 vp_err(vp_id, "failed to get vp state : %lld\n", rc);
872 *out_state = be64_to_cpu(state);
875 EXPORT_SYMBOL_GPL(xive_native_get_vp_state);
877 machine_arch_initcall(powernv, xive_core_debug_init);