KVM: x86: add KVM_CAP_X2APIC_API
authorRadim Krčmář <rkrcmar@redhat.com>
Tue, 12 Jul 2016 20:09:27 +0000 (22:09 +0200)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 14 Jul 2016 07:03:57 +0000 (09:03 +0200)
KVM_CAP_X2APIC_API is a capability for features related to x2APIC
enablement.  KVM_X2APIC_API_32BIT_FORMAT feature can be enabled to
extend APIC ID in get/set ioctl and MSI addresses to 32 bits.
Both are needed to support x2APIC.

The feature has to be enableable and disabled by default, because
get/set ioctl shifted and truncated APIC ID to 8 bits by using a
non-standard protocol inspired by xAPIC and the change is not
backward-compatible.

Changes to MSI addresses follow the format used by interrupt remapping
unit.  The upper address word, that used to be 0, contains upper 24 bits
of the LAPIC address in its upper 24 bits.  Lower 8 bits are reserved as
0.  Using the upper address word is not backward-compatible either as we
didn't check that userspace zeroed the word.  Reserved bits are still
not explicitly checked, but non-zero data will affect LAPIC addresses,
which will cause a bug.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Documentation/virtual/kvm/api.txt
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/irq_comm.c
arch/x86/kvm/lapic.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/trace/events/kvm.h
include/uapi/linux/kvm.h

index 09efa9e..e34e51f 100644 (file)
@@ -1482,6 +1482,11 @@ struct kvm_irq_routing_msi {
        __u32 pad;
 };
 
+On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
+feature of KVM_CAP_X2APIC_API capability is enabled.  If it is enabled,
+address_hi bits 31-8 provide bits 31-8 of the destination id.  Bits 7-0 of
+address_hi must be zero.
+
 struct kvm_irq_routing_s390_adapter {
        __u64 ind_addr;
        __u64 summary_addr;
@@ -1583,6 +1588,17 @@ struct kvm_lapic_state {
 Reads the Local APIC registers and copies them into the input argument.  The
 data format and layout are the same as documented in the architecture manual.
 
+If KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API is
+enabled, then the format of APIC_ID register depends on the APIC mode
+(reported by MSR_IA32_APICBASE) of its VCPU.  x2APIC stores APIC ID in
+the APIC_ID register (bytes 32-35).  xAPIC only allows an 8-bit APIC ID
+which is stored in bits 31-24 of the APIC register, or equivalently in
+byte 35 of struct kvm_lapic_state's regs field.  KVM_GET_LAPIC must then
+be called after MSR_IA32_APICBASE has been set with KVM_SET_MSR.
+
+If KVM_X2APIC_API_USE_32BIT_IDS feature is disabled, struct kvm_lapic_state
+always uses xAPIC format.
+
 
 4.58 KVM_SET_LAPIC
 
@@ -1600,6 +1616,10 @@ struct kvm_lapic_state {
 Copies the input argument into the Local APIC registers.  The data format
 and layout are the same as documented in the architecture manual.
 
+The format of the APIC ID register (bytes 32-35 of struct kvm_lapic_state's
+regs field) depends on the state of the KVM_CAP_X2APIC_API capability.
+See the note in KVM_GET_LAPIC.
+
 
 4.59 KVM_IOEVENTFD
 
@@ -2180,6 +2200,10 @@ struct kvm_msi {
 
 No flags are defined so far. The corresponding field must be 0.
 
+On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is
+enabled.  If it is enabled, address_hi bits 31-8 provide bits 31-8 of the
+destination id.  Bits 7-0 of address_hi must be zero.
+
 
 4.71 KVM_CREATE_PIT2
 
@@ -3811,6 +3835,23 @@ Allows use of runtime-instrumentation introduced with zEC12 processor.
 Will return -EINVAL if the machine does not support runtime-instrumentation.
 Will return -EBUSY if a VCPU has already been created.
 
+7.7 KVM_CAP_X2APIC_API
+
+Architectures: x86
+Parameters: args[0] - features that should be enabled
+Returns: 0 on success, -EINVAL when args[0] contains invalid features
+
+Valid feature flags in args[0] are
+
+#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
+
+Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
+KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
+allowing the use of 32-bit APIC IDs.  See KVM_CAP_X2APIC_API in their
+respective sections.
+
+
+
 8. Other capabilities.
 ----------------------
 
index a2832cc..7c00ba3 100644 (file)
@@ -782,6 +782,8 @@ struct kvm_arch {
        u32 ldr_mode;
        struct page *avic_logical_id_table_page;
        struct page *avic_physical_id_table_page;
+
+       bool x2apic_format;
 };
 
 struct kvm_vm_stat {
@@ -1364,7 +1366,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
                             struct kvm_vcpu **dest_vcpu);
 
-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
                     struct kvm_lapic_irq *irq);
 
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
index 889563d..25810b1 100644 (file)
@@ -110,13 +110,17 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
        return r;
 }
 
-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
                     struct kvm_lapic_irq *irq)
 {
-       trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+       trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ?
+                                            (u64)e->msi.address_hi << 32 : 0),
+                             e->msi.data);
 
        irq->dest_id = (e->msi.address_lo &
                        MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+       if (kvm->arch.x2apic_format)
+               irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi);
        irq->vector = (e->msi.data &
                        MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
        irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
@@ -129,15 +133,24 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 }
 EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
 
+static inline bool kvm_msi_route_invalid(struct kvm *kvm,
+               struct kvm_kernel_irq_routing_entry *e)
+{
+       return kvm->arch.x2apic_format && (e->msi.address_hi & 0xff);
+}
+
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
                struct kvm *kvm, int irq_source_id, int level, bool line_status)
 {
        struct kvm_lapic_irq irq;
 
+       if (kvm_msi_route_invalid(kvm, e))
+               return -EINVAL;
+
        if (!level)
                return -1;
 
-       kvm_set_msi_irq(e, &irq);
+       kvm_set_msi_irq(kvm, e, &irq);
 
        return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
 }
@@ -153,7 +166,10 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
        if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
                return -EWOULDBLOCK;
 
-       kvm_set_msi_irq(e, &irq);
+       if (kvm_msi_route_invalid(kvm, e))
+               return -EINVAL;
+
+       kvm_set_msi_irq(kvm, e, &irq);
 
        if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
                return r;
@@ -286,6 +302,9 @@ int kvm_set_routing_entry(struct kvm *kvm,
                e->msi.address_lo = ue->u.msi.address_lo;
                e->msi.address_hi = ue->u.msi.address_hi;
                e->msi.data = ue->u.msi.data;
+
+               if (kvm_msi_route_invalid(kvm, e))
+                       goto out;
                break;
        case KVM_IRQ_ROUTING_HV_SINT:
                e->set = kvm_hv_set_sint;
@@ -394,7 +413,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
                        if (entry->type != KVM_IRQ_ROUTING_MSI)
                                continue;
 
-                       kvm_set_msi_irq(entry, &irq);
+                       kvm_set_msi_irq(vcpu->kvm, entry, &irq);
 
                        if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
                                                irq.dest_id, irq.dest_mode))
index 3c2a8c1..d27a782 100644 (file)
@@ -1991,10 +1991,15 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
        if (apic_x2apic_mode(vcpu->arch.apic)) {
                u32 *id = (u32 *)(s->regs + APIC_ID);
 
-               if (set)
-                       *id >>= 24;
-               else
-                       *id <<= 24;
+               if (vcpu->kvm->arch.x2apic_format) {
+                       if (*id != vcpu->vcpu_id)
+                               return -EINVAL;
+               } else {
+                       if (set)
+                               *id >>= 24;
+                       else
+                               *id <<= 24;
+               }
        }
 
        return 0;
index 7bdd6b1..b61cdad 100644 (file)
@@ -11104,7 +11104,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
                 * We will support full lowest-priority interrupt later.
                 */
 
-               kvm_set_msi_irq(e, &irq);
+               kvm_set_msi_irq(kvm, e, &irq);
                if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
                        /*
                         * Make sure the IRTE is in remapped mode if
index b6e402d..d86f563 100644 (file)
@@ -90,6 +90,8 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
+#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS)
+
 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static void process_nmi(struct kvm_vcpu *vcpu);
 static void enter_smm(struct kvm_vcpu *vcpu);
@@ -2625,6 +2627,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_TSC_CONTROL:
                r = kvm_has_tsc_control;
                break;
+       case KVM_CAP_X2APIC_API:
+               r = KVM_X2APIC_API_VALID_FLAGS;
+               break;
        default:
                r = 0;
                break;
@@ -3799,6 +3804,16 @@ split_irqchip_unlock:
                mutex_unlock(&kvm->lock);
                break;
        }
+       case KVM_CAP_X2APIC_API:
+               r = -EINVAL;
+               if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
+                       break;
+
+               if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
+                       kvm->arch.x2apic_format = true;
+
+               r = 0;
+               break;
        default:
                r = -EINVAL;
                break;
index f28292d..8ade3eb 100644 (file)
@@ -151,8 +151,9 @@ TRACE_EVENT(kvm_msi_set_irq,
                __entry->data           = data;
        ),
 
-       TP_printk("dst %u vec %u (%s|%s|%s%s)",
-                 (u8)(__entry->address >> 12), (u8)__entry->data,
+       TP_printk("dst %llx vec %u (%s|%s|%s%s)",
+                 (u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00),
+                 (u8)__entry->data,
                  __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
                  (__entry->address & (1<<2)) ? "logical" : "physical",
                  (__entry->data & (1<<15)) ? "level" : "edge",
index 05ebf47..f704403 100644 (file)
@@ -866,6 +866,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_PMU_V3 126
 #define KVM_CAP_VCPU_ATTRIBUTES 127
 #define KVM_CAP_MAX_VCPU_ID 128
+#define KVM_CAP_X2APIC_API 129
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1313,4 +1314,6 @@ struct kvm_assigned_msix_entry {
        __u16 padding[3];
 };
 
+#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
+
 #endif /* __LINUX_KVM_H */