KVM: PPC: Book3S HV: Enable use of the new XIVE "single escalation" feature
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>
Fri, 12 Jan 2018 02:37:12 +0000 (13:37 +1100)
committerPaul Mackerras <paulus@ozlabs.org>
Fri, 19 Jan 2018 01:10:21 +0000 (12:10 +1100)
That feature, provided by Power9 DD2.0 and later, when supported
by newer OPAL versions, allows us to sacrifice a queue (priority 7)
in favor of merging all the escalation interrupts of the queues
of a single VP into a single interrupt.

This reduces the number of host interrupts used up by KVM guests
especially when those guests use multiple priorities.

It will also enable a future change to control the masking of the
escalation interrupts more precisely to avoid spurious ones.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
arch/powerpc/include/asm/opal-api.h
arch/powerpc/include/asm/xive.h
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/kvm/book3s_xive.h
arch/powerpc/sysdev/xive/native.c

index 233c7504b1f20bf036b1131eb0c9db6dcecdafaa..fc926743647ec6f5b79eabadc93d88b2c5a77072 100644 (file)
@@ -1073,6 +1073,7 @@ enum {
 /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
 enum {
        OPAL_XIVE_VP_ENABLED            = 0x00000001,
+       OPAL_XIVE_VP_SINGLE_ESCALATION  = 0x00000002,
 };
 
 /* "Any chip" replacement for chip ID for allocation functions */
index b619a5585cd68bae8834bf5b52ee95e77ff6fa6f..e602903c3029e88c61ca2141901cb68e8d67f5ae 100644 (file)
@@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
 
 extern void xive_native_sync_source(u32 hw_irq);
 extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
 extern int xive_native_disable_vp(u32 vp_id);
 extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+extern bool xive_native_has_single_escalation(void);
 
 #else
 
index 6cff5bdfd6b7ef1487d854b089f776dc68f65dd0..a102efeabf05a1b2aa37fcfd35f0d33867f36d0e 100644 (file)
@@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
                return -EIO;
        }
 
-       /*
-        * Future improvement: start with them disabled
-        * and handle DD2 and later scheme of merged escalation
-        * interrupts
-        */
-       name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
-                        vcpu->kvm->arch.lpid, xc->server_num, prio);
+       if (xc->xive->single_escalation)
+               name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+                                vcpu->kvm->arch.lpid, xc->server_num);
+       else
+               name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+                                vcpu->kvm->arch.lpid, xc->server_num, prio);
        if (!name) {
                pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
                       prio, xc->server_num);
                rc = -ENOMEM;
                goto error;
        }
+
+       pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
        rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
                         IRQF_NO_THREAD, name, vcpu);
        if (rc) {
@@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
 
        pr_devel("Provisioning prio... %d\n", prio);
 
-       /* Provision each VCPU and enable escalations */
+       /* Provision each VCPU and enable escalations if needed */
        kvm_for_each_vcpu(i, vcpu, kvm) {
                if (!vcpu->arch.xive_vcpu)
                        continue;
                rc = xive_provision_queue(vcpu, prio);
-               if (rc == 0)
+               if (rc == 0 && !xive->single_escalation)
                        xive_attach_escalation(vcpu, prio);
                if (rc)
                        return rc;
@@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
        /* Allocate IPI */
        xc->vp_ipi = xive_native_alloc_irq();
        if (!xc->vp_ipi) {
+               pr_err("Failed to allocate xive irq for VCPU IPI\n");
                r = -EIO;
                goto bail;
        }
@@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
        if (r)
                goto bail;
 
+       /*
+        * Enable the VP first as the single escalation mode will
+        * affect escalation interrupts numbering
+        */
+       r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+       if (r) {
+               pr_err("Failed to enable VP in OPAL, err %d\n", r);
+               goto bail;
+       }
+
        /*
         * Initialize queues. Initially we set them all for no queueing
         * and we enable escalation for queue 0 only which we'll use for
         * our mfrr change notifications. If the VCPU is hot-plugged, we
-        * do handle provisioning however.
+        * do handle provisioning however based on the existing "map"
+        * of enabled queues.
         */
        for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
                struct xive_q *q = &xc->queues[i];
 
+               /* Single escalation, no queue 7 */
+               if (i == 7 && xive->single_escalation)
+                       break;
+
                /* Is queue already enabled ? Provision it */
                if (xive->qmap & (1 << i)) {
                        r = xive_provision_queue(vcpu, i);
-                       if (r == 0)
+                       if (r == 0 && !xive->single_escalation)
                                xive_attach_escalation(vcpu, i);
                        if (r)
                                goto bail;
@@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
        if (r)
                goto bail;
 
-       /* Enable the VP */
-       r = xive_native_enable_vp(xc->vp_id);
-       if (r)
-               goto bail;
-
        /* Route the IPI */
        r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
        if (!r)
@@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
        pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
                 val, server, guest_prio);
+
        /*
         * If the source doesn't already have an IPI, allocate
         * one and get the corresponding data
@@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
        if (xive->vp_base == XIVE_INVALID_VP)
                ret = -ENOMEM;
 
+       xive->single_escalation = xive_native_has_single_escalation();
+
        if (ret) {
                kfree(xive);
                return ret;
index 6ba63f8e8a614ed4aad2a5890236834f3e08c0a7..a08ae6fd4c51fc54b9c79ffe48290b7f86b58956 100644 (file)
@@ -120,6 +120,8 @@ struct kvmppc_xive {
        u32     q_order;
        u32     q_page_order;
 
+       /* Flags */
+       u8      single_escalation;
 };
 
 #define KVMPPC_XIVE_Q_COUNT    8
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
  * is as follow.
  *
  * Guest request for 0...6 are honored. Guest request for anything
- * higher results in a priority of 7 being applied.
- *
- * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
- * in order to match AIX expectations
+ * higher results in a priority of 6 being applied.
  *
  * Similar mapping is done for CPPR values
  */
 static inline u8 xive_prio_from_guest(u8 prio)
 {
-       if (prio == 0xff || prio < 8)
+       if (prio == 0xff || prio < 6)
                return prio;
-       return 7;
+       return 6;
 }
 
 static inline u8 xive_prio_to_guest(u8 prio)
 {
-       if (prio == 0xff || prio < 7)
-               return prio;
-       return 0xb;
+       return prio;
 }
 
 static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
index ebc244b08d6748512c19199446d25f7ac49fca9b..d22aeb0b69e107636e94e7811785e573f60880ce 100644 (file)
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
 static u32 xive_queue_shift;
 static u32 xive_pool_vps = XIVE_INVALID_VP;
 static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
 
 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 {
@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
                        break;
        }
 
+       /* Do we support single escalation */
+       if (of_get_property(np, "single-escalation-support", NULL) != NULL)
+               xive_has_single_esc = true;
+
        /* Configure Thread Management areas for KVM */
        for_each_possible_cpu(cpu)
                kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
 }
 EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
 
-int xive_native_enable_vp(u32 vp_id)
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
 {
        s64 rc;
+       u64 flags = OPAL_XIVE_VP_ENABLED;
 
+       if (single_escalation)
+               flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
        for (;;) {
-               rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+               rc = opal_xive_set_vp_info(vp_id, flags, 0);
                if (rc != OPAL_BUSY)
                        break;
                msleep(1);
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
        return 0;
 }
 EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+       return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);