KVM: x86/pmu: Introduce pmc->is_paused to reduce the call time of perf interfaces
authorLike Xu <likexu@tencent.com>
Wed, 28 Jul 2021 12:07:05 +0000 (20:07 +0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 4 Aug 2021 09:55:56 +0000 (05:55 -0400)
Based on our observations, after any vm-exit associated with vPMU, there
are at least two or more perf interfaces to be called for guest counter
emulation, such as perf_event_{pause, read_value, period}(), and each one
will {lock, unlock} the same perf_event_ctx. The frequency of calls becomes
more severe when guest use counters in a multiplexed manner.

Holding a lock once and completing the KVM request operations in the perf
context would introduce a set of impractical new interfaces. So we can
further optimize the vPMU implementation by avoiding repeated calls to
these interfaces in the KVM context for at least one pattern:

After we call perf_event_pause() once, the event will be disabled and its
internal count will be reset to 0. So there is no need to pause it again
or read its value. Once the event is paused, event period will not be
updated until the next time it's resumed or reprogrammed. And there is
also no need to call perf_event_period twice for a non-running counter,
considering the perf_event for a running counter is never paused.

Based on this implementation, for the following common usage of
sampling 4 events using perf on a 4u8g guest:

  echo 0 > /proc/sys/kernel/watchdog
  echo 25 > /proc/sys/kernel/perf_cpu_time_max_percent
  echo 10000 > /proc/sys/kernel/perf_event_max_sample_rate
  echo 0 > /proc/sys/kernel/perf_cpu_time_max_percent
  for i in `seq 1 1 10`
  do
  taskset -c 0 perf record \
  -e cpu-cycles -e instructions -e branch-instructions -e cache-misses \
  /root/br_instr a
  done

the average latency of the guest NMI handler is reduced from
37646.7 ns to 32929.3 ns (~1.14x speed up) on the Intel ICX server.
Also, in addition to collecting more samples, no loss of sampling
accuracy was observed compared to before the optimization.

Signed-off-by: Like Xu <likexu@tencent.com>
Message-Id: <20210728120705.6855-1-likexu@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/pmu.c
arch/x86/kvm/pmu.h
arch/x86/kvm/vmx/pmu_intel.c

index 99f37781a6fcf781dcd5e9ef4488b19a8a6b9c1b..a079880d4cd5fec2ca91b4164908543236d44ac5 100644 (file)
@@ -482,6 +482,7 @@ struct kvm_pmc {
         * ctrl value for fixed counters.
         */
        u64 current_config;
+       bool is_paused;
 };
 
 struct kvm_pmu {
index 827886c12c16e6a9b297ec58c1a997485288dadb..0772bad9165c55b09c805caee6287de9df1257bb 100644 (file)
@@ -137,18 +137,20 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
        pmc->perf_event = event;
        pmc_to_pmu(pmc)->event_count++;
        clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
+       pmc->is_paused = false;
 }
 
 static void pmc_pause_counter(struct kvm_pmc *pmc)
 {
        u64 counter = pmc->counter;
 
-       if (!pmc->perf_event)
+       if (!pmc->perf_event || pmc->is_paused)
                return;
 
        /* update counter, reset event value to avoid redundant accumulation */
        counter += perf_event_pause(pmc->perf_event, true);
        pmc->counter = counter & pmc_bitmask(pmc);
+       pmc->is_paused = true;
 }
 
 static bool pmc_resume_counter(struct kvm_pmc *pmc)
@@ -163,6 +165,7 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
 
        /* reuse perf_event to serve as pmc_reprogram_counter() does*/
        perf_event_enable(pmc->perf_event);
+       pmc->is_paused = false;
 
        clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
        return true;
index 67e753edfa225bafdd2a728e5c2d9939b0bc2db6..0e4f2b1fa9fbdc20574483a04ec0c74a14c96550 100644 (file)
@@ -55,7 +55,7 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
        u64 counter, enabled, running;
 
        counter = pmc->counter;
-       if (pmc->perf_event)
+       if (pmc->perf_event && !pmc->is_paused)
                counter += perf_event_read_value(pmc->perf_event,
                                                 &enabled, &running);
        /* FIXME: Scaling needed? */
index 9efc1a6b86930ad5879faa487270c8ab459a2da6..10cc4f65c4efdb36c0c2fce05e3945d138cff093 100644 (file)
@@ -437,13 +437,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                            !(msr & MSR_PMC_FULL_WIDTH_BIT))
                                data = (s64)(s32)data;
                        pmc->counter += data - pmc_read_counter(pmc);
-                       if (pmc->perf_event)
+                       if (pmc->perf_event && !pmc->is_paused)
                                perf_event_period(pmc->perf_event,
                                                  get_sample_period(pmc, data));
                        return 0;
                } else if ((pmc = get_fixed_pmc(pmu, msr))) {
                        pmc->counter += data - pmc_read_counter(pmc);
-                       if (pmc->perf_event)
+                       if (pmc->perf_event && !pmc->is_paused)
                                perf_event_period(pmc->perf_event,
                                                  get_sample_period(pmc, data));
                        return 0;