perf/x86: Add ability to calculate TSC from perf sample timestamps
authorAdrian Hunter <adrian.hunter@intel.com>
Fri, 28 Jun 2013 13:22:18 +0000 (16:22 +0300)
committerIngo Molnar <mingo@kernel.org>
Tue, 23 Jul 2013 10:17:45 +0000 (12:17 +0200)
For modern CPUs, perf clock is directly related to TSC.  TSC
can be calculated from perf clock and vice versa using a simple
calculation.  Two of the three componenets of that calculation
are already exported in struct perf_event_mmap_page.  This patch
exports the third.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1372425741-1676-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/tsc.h
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/tsc.c
include/uapi/linux/perf_event.h

index c91e8b9..235be70 100644 (file)
@@ -49,6 +49,7 @@ extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
+extern int check_tsc_disabled(void);
 extern unsigned long native_calibrate_tsc(void);
 
 extern int tsc_clocksource_reliable;
index a7c7305..8355c84 100644 (file)
@@ -1884,6 +1884,7 @@ static struct pmu pmu = {
 void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 {
        userpg->cap_usr_time = 0;
+       userpg->cap_usr_time_zero = 0;
        userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
        userpg->pmc_width = x86_pmu.cntval_bits;
 
@@ -1897,6 +1898,11 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
        userpg->time_mult = this_cpu_read(cyc2ns);
        userpg->time_shift = CYC2NS_SCALE_FACTOR;
        userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+
+       if (sched_clock_stable && !check_tsc_disabled()) {
+               userpg->cap_usr_time_zero = 1;
+               userpg->time_zero = this_cpu_read(cyc2ns_offset);
+       }
 }
 
 /*
index 6ff4924..930e5d4 100644 (file)
@@ -89,6 +89,12 @@ int check_tsc_unstable(void)
 }
 EXPORT_SYMBOL_GPL(check_tsc_unstable);
 
+int check_tsc_disabled(void)
+{
+       return tsc_disabled;
+}
+EXPORT_SYMBOL_GPL(check_tsc_disabled);
+
 #ifdef CONFIG_X86_TSC
 int __init notsc_setup(char *str)
 {
index 0041aed..efef1d3 100644 (file)
@@ -378,7 +378,8 @@ struct perf_event_mmap_page {
                struct {
                        __u64   cap_usr_time            : 1,
                                cap_usr_rdpmc           : 1,
-                               cap_____res             : 62;
+                               cap_usr_time_zero       : 1,
+                               cap_____res             : 61;
                };
        };
 
@@ -420,12 +421,29 @@ struct perf_event_mmap_page {
        __u16   time_shift;
        __u32   time_mult;
        __u64   time_offset;
+       /*
+        * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
+        * from sample timestamps.
+        *
+        *   time = timestamp - time_zero;
+        *   quot = time / time_mult;
+        *   rem  = time % time_mult;
+        *   cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
+        *
+        * And vice versa:
+        *
+        *   quot = cyc >> time_shift;
+        *   rem  = cyc & ((1 << time_shift) - 1);
+        *   timestamp = time_zero + quot * time_mult +
+        *               ((rem * time_mult) >> time_shift);
+        */
+       __u64   time_zero;
 
                /*
                 * Hole for extension of the self monitor capabilities
                 */
 
-       __u64   __reserved[120];        /* align to 1k */
+       __u64   __reserved[119];        /* align to 1k */
 
        /*
         * Control data for the mmap() data buffer.