KVM: x86: Use static calls to reduce kvm_pmu_ops overhead
authorLike Xu <likexu@tencent.com>
Tue, 29 Mar 2022 23:50:54 +0000 (23:50 +0000)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 13 Apr 2022 17:37:45 +0000 (13:37 -0400)
Use static calls to improve kvm_pmu_ops performance, following the same
pattern and naming scheme used by kvm-x86-ops.h.

Here are the worst fenced_rdtsc() cycles numbers for the kvm_pmu_ops
functions that is most often called (up to 7 digits of calls) when running
a single perf test case in a guest on an ICX 2.70GHz host (mitigations=on):

| legacy | static call
------------------------------------------------------------
.pmc_idx_to_pmc | 1304840 | 994872 (+23%)
.pmc_is_enabled | 978670 | 1011750 (-3%)
.msr_idx_to_pmc | 47828 | 41690 (+12%)
.is_valid_msr | 28786 | 30108 (-4%)

Signed-off-by: Like Xu <likexu@tencent.com>
[sean: Handle static call updates in pmu.c, tweak changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220329235054.3534728-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm-x86-pmu-ops.h [new file with mode: 0644]
arch/x86/kvm/pmu.c

diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
new file mode 100644 (file)
index 0000000..fdfd8e0
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL)
+BUILD_BUG_ON(1)
+#endif
+
+/*
+ * KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate
+ * both DECLARE/DEFINE_STATIC_CALL() invocations and
+ * "static_call_update()" calls.
+ *
+ * KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have
+ * a NULL definition, for example if "static_call_cond()" will be used
+ * at the call sites.
+ */
+KVM_X86_PMU_OP(pmc_perf_hw_id)
+KVM_X86_PMU_OP(pmc_is_enabled)
+KVM_X86_PMU_OP(pmc_idx_to_pmc)
+KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
+KVM_X86_PMU_OP(msr_idx_to_pmc)
+KVM_X86_PMU_OP(is_valid_rdpmc_ecx)
+KVM_X86_PMU_OP(is_valid_msr)
+KVM_X86_PMU_OP(get_msr)
+KVM_X86_PMU_OP(set_msr)
+KVM_X86_PMU_OP(refresh)
+KVM_X86_PMU_OP(init)
+KVM_X86_PMU_OP(reset)
+KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
+KVM_X86_PMU_OP_OPTIONAL(cleanup)
+
+#undef KVM_X86_PMU_OP
+#undef KVM_X86_PMU_OP_OPTIONAL
index bb0b1ad..618f529 100644 (file)
 
 static struct kvm_pmu_ops kvm_pmu_ops __read_mostly;
 
+#define KVM_X86_PMU_OP(func)                                        \
+       DEFINE_STATIC_CALL_NULL(kvm_x86_pmu_##func,                          \
+                               *(((struct kvm_pmu_ops *)0)->func));
+#define KVM_X86_PMU_OP_OPTIONAL KVM_X86_PMU_OP
+#include <asm/kvm-x86-pmu-ops.h>
+
 void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
 {
        memcpy(&kvm_pmu_ops, pmu_ops, sizeof(kvm_pmu_ops));
+
+#define __KVM_X86_PMU_OP(func) \
+       static_call_update(kvm_x86_pmu_##func, kvm_pmu_ops.func);
+#define KVM_X86_PMU_OP(func) \
+       WARN_ON(!kvm_pmu_ops.func); __KVM_X86_PMU_OP(func)
+#define KVM_X86_PMU_OP_OPTIONAL __KVM_X86_PMU_OP
+#include <asm/kvm-x86-pmu-ops.h>
+#undef __KVM_X86_PMU_OP
 }
 
 static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
 {
-       return kvm_pmu_ops.pmc_is_enabled(pmc);
+       return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
 }
 
 static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
@@ -225,7 +239,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
                          ARCH_PERFMON_EVENTSEL_CMASK |
                          HSW_IN_TX |
                          HSW_IN_TX_CHECKPOINTED))) {
-               config = kvm_pmu_ops.pmc_perf_hw_id(pmc);
+               config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
                if (config != PERF_COUNT_HW_MAX)
                        type = PERF_TYPE_HARDWARE;
        }
@@ -275,7 +289,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
 
        pmc->current_config = (u64)ctrl;
        pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
-                             kvm_pmu_ops.pmc_perf_hw_id(pmc),
+                             static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc),
                              !(en_field & 0x2), /* exclude user */
                              !(en_field & 0x1), /* exclude kernel */
                              pmi);
@@ -284,7 +298,7 @@ EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
 
 void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
 {
-       struct kvm_pmc *pmc = kvm_pmu_ops.pmc_idx_to_pmc(pmu, pmc_idx);
+       struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, pmc_idx);
 
        if (!pmc)
                return;
@@ -306,7 +320,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
        int bit;
 
        for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
-               struct kvm_pmc *pmc = kvm_pmu_ops.pmc_idx_to_pmc(pmu, bit);
+               struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
 
                if (unlikely(!pmc || !pmc->perf_event)) {
                        clear_bit(bit, pmu->reprogram_pmi);
@@ -328,7 +342,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 /* check if idx is a valid index to access PMU */
 bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
-       return kvm_pmu_ops.is_valid_rdpmc_ecx(vcpu, idx);
+       return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx);
 }
 
 bool is_vmware_backdoor_pmc(u32 pmc_idx)
@@ -378,7 +392,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
        if (is_vmware_backdoor_pmc(idx))
                return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
 
-       pmc = kvm_pmu_ops.rdpmc_ecx_to_pmc(vcpu, idx, &mask);
+       pmc = static_call(kvm_x86_pmu_rdpmc_ecx_to_pmc)(vcpu, idx, &mask);
        if (!pmc)
                return 1;
 
@@ -394,22 +408,21 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
 {
        if (lapic_in_kernel(vcpu)) {
-               if (kvm_pmu_ops.deliver_pmi)
-                       kvm_pmu_ops.deliver_pmi(vcpu);
+               static_call_cond(kvm_x86_pmu_deliver_pmi)(vcpu);
                kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
        }
 }
 
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 {
-       return kvm_pmu_ops.msr_idx_to_pmc(vcpu, msr) ||
-               kvm_pmu_ops.is_valid_msr(vcpu, msr);
+       return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) ||
+               static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr);
 }
 
 static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
 {
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-       struct kvm_pmc *pmc = kvm_pmu_ops.msr_idx_to_pmc(vcpu, msr);
+       struct kvm_pmc *pmc = static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr);
 
        if (pmc)
                __set_bit(pmc->idx, pmu->pmc_in_use);
@@ -417,13 +430,13 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
 
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
-       return kvm_pmu_ops.get_msr(vcpu, msr_info);
+       return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
 }
 
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
        kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
-       return kvm_pmu_ops.set_msr(vcpu, msr_info);
+       return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
 }
 
 /* refresh PMU settings. This function generally is called when underlying
@@ -432,7 +445,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  */
 void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
 {
-       kvm_pmu_ops.refresh(vcpu);
+       static_call(kvm_x86_pmu_refresh)(vcpu);
 }
 
 void kvm_pmu_reset(struct kvm_vcpu *vcpu)
@@ -440,7 +453,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu)
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
        irq_work_sync(&pmu->irq_work);
-       kvm_pmu_ops.reset(vcpu);
+       static_call(kvm_x86_pmu_reset)(vcpu);
 }
 
 void kvm_pmu_init(struct kvm_vcpu *vcpu)
@@ -448,7 +461,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
        memset(pmu, 0, sizeof(*pmu));
-       kvm_pmu_ops.init(vcpu);
+       static_call(kvm_x86_pmu_init)(vcpu);
        init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
        pmu->event_count = 0;
        pmu->need_cleanup = false;
@@ -480,14 +493,13 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
                      pmu->pmc_in_use, X86_PMC_IDX_MAX);
 
        for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
-               pmc = kvm_pmu_ops.pmc_idx_to_pmc(pmu, i);
+               pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
 
                if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
                        pmc_stop_counter(pmc);
        }
 
-       if (kvm_pmu_ops.cleanup)
-               kvm_pmu_ops.cleanup(vcpu);
+       static_call_cond(kvm_x86_pmu_cleanup)(vcpu);
 
        bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
 }
@@ -517,7 +529,7 @@ static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
        unsigned int config;
 
        pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK);
-       config = kvm_pmu_ops.pmc_perf_hw_id(pmc);
+       config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
        pmc->eventsel = old_eventsel;
        return config == perf_hw_id;
 }
@@ -545,7 +557,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
        int i;
 
        for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
-               pmc = kvm_pmu_ops.pmc_idx_to_pmc(pmu, i);
+               pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
 
                if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
                        continue;