KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS
authorLike Xu <like.xu@linux.intel.com>
Mon, 11 Apr 2022 10:19:36 +0000 (18:19 +0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 8 Jun 2022 08:47:55 +0000 (04:47 -0400)
If IA32_PERF_CAPABILITIES.PEBS_BASELINE [bit 14] is set, the
IA32_PEBS_ENABLE MSR exists and all architecturally enumerated fixed
and general-purpose counters have corresponding bits in IA32_PEBS_ENABLE
that enable generation of PEBS records. The general-purpose counter bits
start at bit IA32_PEBS_ENABLE[0], and the fixed counter bits start at
bit IA32_PEBS_ENABLE[32].

When guest PEBS is enabled, the IA32_PEBS_ENABLE MSR will be
added to the perf_guest_switch_msr() and atomically switched during
the VMX transitions just like CORE_PERF_GLOBAL_CTRL MSR.

Based on whether the platform supports x86_pmu.pebs_ept, it has also
refactored the way to add more msrs to arr[] in intel_guest_get_msrs()
for extensibility.

Originally-by: Andi Kleen <ak@linux.intel.com>
Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Co-developed-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Message-Id: <20220411101946.20262-8-likexu@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/events/intel/core.c
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/msr-index.h
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/x86.c

index 8c9cb41..70a5c66 100644 (file)
@@ -3969,33 +3969,72 @@ static int intel_pmu_hw_config(struct perf_event *event)
        return 0;
 }
 
+/*
+ * Currently, the only caller of this function is the atomic_switch_perf_msrs().
+ * The host perf conext helps to prepare the values of the real hardware for
+ * a set of msrs that need to be switched atomically in a vmx transaction.
+ *
+ * For example, the pseudocode needed to add a new msr should look like:
+ *
+ * arr[(*nr)++] = (struct perf_guest_switch_msr){
+ *     .msr = the hardware msr address,
+ *     .host = the value the hardware has when it doesn't run a guest,
+ *     .guest = the value the hardware has when it runs a guest,
+ * };
+ *
+ * These values have nothing to do with the emulated values the guest sees
+ * when it uses {RD,WR}MSR, which should be handled by the KVM context,
+ * specifically in the intel_pmu_{get,set}_msr().
+ */
 static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
        u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
+       u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable;
+       int global_ctrl, pebs_enable;
+
+       *nr = 0;
+       global_ctrl = (*nr)++;
+       arr[global_ctrl] = (struct perf_guest_switch_msr){
+               .msr = MSR_CORE_PERF_GLOBAL_CTRL,
+               .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask,
+               .guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask),
+       };
 
-       arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
-       arr[0].host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
-       arr[0].guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask;
-       arr[0].guest &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable);
-       *nr = 1;
+       if (!x86_pmu.pebs)
+               return arr;
 
-       if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
-               /*
-                * If PMU counter has PEBS enabled it is not enough to
-                * disable counter on a guest entry since PEBS memory
-                * write can overshoot guest entry and corrupt guest
-                * memory. Disabling PEBS solves the problem.
-                *
-                * Don't do this if the CPU already enforces it.
-                */
-               arr[1].msr = MSR_IA32_PEBS_ENABLE;
-               arr[1].host = cpuc->pebs_enabled;
-               arr[1].guest = 0;
-               *nr = 2;
+       /*
+        * If PMU counter has PEBS enabled it is not enough to
+        * disable counter on a guest entry since PEBS memory
+        * write can overshoot guest entry and corrupt guest
+        * memory. Disabling PEBS solves the problem.
+        *
+        * Don't do this if the CPU already enforces it.
+        */
+       if (x86_pmu.pebs_no_isolation) {
+               arr[(*nr)++] = (struct perf_guest_switch_msr){
+                       .msr = MSR_IA32_PEBS_ENABLE,
+                       .host = cpuc->pebs_enabled,
+                       .guest = 0,
+               };
+               return arr;
        }
 
+       if (!x86_pmu.pebs_ept)
+               return arr;
+       pebs_enable = (*nr)++;
+
+       arr[pebs_enable] = (struct perf_guest_switch_msr){
+               .msr = MSR_IA32_PEBS_ENABLE,
+               .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
+               .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
+       };
+
+       /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
+       arr[0].guest |= arr[*nr].guest;
+
        return arr;
 }
 
index 7458abe..36a5650 100644 (file)
@@ -521,6 +521,9 @@ struct kvm_pmu {
        DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
        DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
 
+       u64 pebs_enable;
+       u64 pebs_enable_mask;
+
        /*
         * The gate to release perf_events not marked in
         * pmc_in_use only once in a vcpu time slice.
index c194995..bd1861c 100644 (file)
 #define PERF_CAP_PT_IDX                        16
 
 #define MSR_PEBS_LD_LAT_THRESHOLD      0x000003f6
+#define PERF_CAP_PEBS_TRAP             BIT_ULL(6)
+#define PERF_CAP_ARCH_REG              BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT           0xf00
+#define PERF_CAP_PEBS_BASELINE         BIT_ULL(14)
+#define PERF_CAP_PEBS_MASK     (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+                                PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
 
 #define MSR_IA32_RTIT_CTL              0x00000570
 #define RTIT_CTL_TRACEEN               BIT(0)
index c04d123..2cd4f8a 100644 (file)
@@ -214,6 +214,9 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
                ret = pmu->version > 1;
                break;
+       case MSR_IA32_PEBS_ENABLE:
+               ret = vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT;
+               break;
        default:
                ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
                        get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
@@ -361,6 +364,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
                msr_info->data = 0;
                return 0;
+       case MSR_IA32_PEBS_ENABLE:
+               msr_info->data = pmu->pebs_enable;
+               return 0;
        default:
                if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
                    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -421,6 +427,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 0;
                }
                break;
+       case MSR_IA32_PEBS_ENABLE:
+               if (pmu->pebs_enable == data)
+                       return 0;
+               if (!(data & pmu->pebs_enable_mask)) {
+                       pmu->pebs_enable = data;
+                       return 0;
+               }
+               break;
        default:
                if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
                    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -489,6 +503,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
        pmu->reserved_bits = 0xffffffff00200000ull;
        pmu->raw_event_mask = X86_RAW_EVENT_MASK;
        pmu->fixed_ctr_ctrl_mask = ~0ull;
+       pmu->pebs_enable_mask = ~0ull;
 
        entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
        if (!entry || !vcpu->kvm->arch.enable_pmu)
@@ -560,6 +575,22 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 
        if (lbr_desc->records.nr)
                bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
+
+       if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT) {
+               if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_BASELINE) {
+                       pmu->pebs_enable_mask = ~pmu->global_ctrl;
+                       pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE;
+                       for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
+                               pmu->fixed_ctr_ctrl_mask &=
+                                       ~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4));
+                       }
+               } else {
+                       pmu->pebs_enable_mask =
+                               ~((1ull << pmu->nr_arch_gp_counters) - 1);
+               }
+       } else {
+               vcpu->arch.perf_capabilities &= ~PERF_CAP_PEBS_MASK;
+       }
 }
 
 static void intel_pmu_init(struct kvm_vcpu *vcpu)
index 68ec5cb..12183c7 100644 (file)
@@ -1448,6 +1448,7 @@ static const u32 msrs_to_save_all[] = {
        MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
        MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
        MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
+       MSR_IA32_PEBS_ENABLE,
 
        MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
        MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,