cpufreq/x86: Enable __getload() helper for Medfield-x86 platform
authorSundar Iyer <sundar.iyer@intel.com>
Mon, 19 Mar 2012 09:57:59 +0000 (15:27 +0530)
committerbuildbot <buildbot@intel.com>
Mon, 14 May 2012 12:00:15 +0000 (05:00 -0700)
BZ: 31189

Enable the hardware based load calculation for the Medfield x86-HT platform.
This makes sure that the on-demand governor responds correctly to the load
based on the physical core and not the logical siblings

Change-Id: Iaeaf5f154ca955f3e8cfe09c052f23913edbf5a1
Signed-off-by: Chuan A Wang <chuan.a.wang@intel.com>
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Ke Chen <ke.chen@intel.com>
Signed-off-by: Sundar Iyer <sundar.iyer@intel.com>
Reviewed-on: http://android.intel.com:8080/48470
Reviewed-by: Martin, LoicX <loicx.martin@intel.com>
Tested-by: Martin, LoicX <loicx.martin@intel.com>
Reviewed-by: buildbot <buildbot@intel.com>
Tested-by: buildbot <buildbot@intel.com>
drivers/cpufreq/sfi-cpufreq.c

index 688ef5c..dd9acee 100644 (file)
@@ -205,6 +205,193 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
        return cached_freq;
 }
 
+/*
+ * Since MSRs record the residency with 1MHz clock,
+ * then to compute the actual residency at maximum speed,
+ * the value should multiply (maximum clock / 1M ).
+ * For MFLD, maximum speed is 1597MHz, so the multiplier=1597
+ */
+#define MFLD_RESIDENCY_COUNT_MULTIPLIER (1597UL)
+/*C-states related data structures and fuctions for cpu load calculation*/
+
+/*  MSR counter stuff */
+enum {
+       MPERF = 0, /* C0 */
+       APERF,     /* C1 */
+       C2,
+       C3,
+       C4,
+       C5,
+       C6,
+       C7,
+       C8,
+       C9,
+       /* C10, */
+       /* C11, */
+       MAX_MSR_ADDRESSES
+};
+
+/*
+ * The core MSR addresses are hard coded for Intel ATOM MFLD
+ * The addresses need to be re-checked for other Intel devices.
+ *
+ */
+static unsigned int CoreResidencyMSRAddresses[MAX_MSR_ADDRESSES] = {
+       0xE7,                   /*MPERF*/
+       0xFFFFFFFF,             /*C1*/
+       0x3F8,                  /*C2*/
+       0xFFFFFFFF,             /*C3*/
+       0x3F9,                  /*C4*/
+       0xFFFFFFFF,             /*C5*/
+       0x3FA,                  /*C6*/
+       0xFFFFFFFF,             /*C7*/
+       0xFFFFFFFF,             /*C8*/
+       0xFFFFFFFF              /*C9*/
+};
+
+/*
+ * Per-cpu structure holding MSR residency counts,
+ * timer-TSC values etc.
+ */
+struct per_cpu_t {
+       u64 tsc; /* 8 bytes */
+       u64 residencies[MAX_MSR_ADDRESSES]; /* 96 bytes */
+       u64 prev_msr_vals[MAX_MSR_ADDRESSES]; /* 96 bytes */
+};
+
+/*
+ * Convenience macros for accessing per-cpu residencies
+ */
+#define RESIDENCY(p, i) ((p)->residencies[(i)])
+#define PREV_MSR_VAL(p, i) ((p)->prev_msr_vals[(i)])
+
+static DEFINE_PER_CPU(struct per_cpu_t, per_cpu_counts);
+
+/*
+ * Do we read the TSC MSR directly to determine
+ * TSC (as opposed to using a kernel
+ * function call -- e.g. rdtscll)?
+ */
+#define READ_MSR_FOR_TSC 1
+
+/* Helper function to get TSC */
+static inline void tscval(u64 *v)
+{
+#if READ_MSR_FOR_TSC
+       u64 res;
+       rdmsrl(0x10, res);
+       *v = res;
+#else
+       unsigned int aux;
+       rdtscpll(*v, aux);
+#endif
+};
+
+#define C1 APERF
+
+/*
+ * Get the delta residency for MSRs
+ */
+static u64 read_one_residency(int cpu, int msr_addr, u64 *prev)
+{
+       u64 curr = 0, delta = 0;
+
+       rdmsrl(msr_addr, curr);
+
+       if (unlikely(curr < *prev))
+               delta = ((u64)(~0) - *prev) + (curr + 1);
+       else
+               delta = curr - *prev;
+
+       *prev = curr;
+
+       return delta;
+};
+
+static unsigned int calc_cpu_load(struct per_cpu_t *pcpu, int cpu)
+{
+       int i = 0;
+       u64 prev;
+       int msr_addr;
+       u64 tsc;
+       u64 delta_tsc, c0;
+       u64 m_delta, c_delta;
+       bool is_first = false;
+       u64 cx_total = 0;
+       u32 clock_multiplier = MFLD_RESIDENCY_COUNT_MULTIPLIER;
+       u64 cpu_load = 0;
+       /*
+        * Ensure updates are propagated.
+        */
+       smp_mb();
+
+       is_first = false;
+
+       if (unlikely(PREV_MSR_VAL(pcpu, MPERF) == 0))
+               is_first = true;
+
+       msr_addr = CoreResidencyMSRAddresses[MPERF];
+       prev = PREV_MSR_VAL(pcpu, MPERF);
+       /*
+        * Read MPERF, compute DELTA(MPERF)
+        */
+       m_delta = read_one_residency(cpu, msr_addr, &prev);
+
+       PREV_MSR_VAL(pcpu, MPERF) = prev;
+       /*
+        * 'C1' is a DERIVED residency -- we
+        * don't read MSRs for it. Instead, we
+        * compute its value from the values of
+        * OTHER Cx/MPERF/TSC. Reset to zero here.
+        * Currently, we combine C1 with C0 together
+        * as C0 for load calculation. And expriments
+        * show that make sense.
+        */
+       RESIDENCY(pcpu, C1) = 0;
+       /*
+        * Calculate (non-C1) C-state residency
+        */
+       for (i = C2; i <= C6; ++i) {
+               RESIDENCY(pcpu, i) = 0;
+               msr_addr = CoreResidencyMSRAddresses[i];
+               if (msr_addr <= 0)
+                       continue;
+
+               prev = PREV_MSR_VAL(pcpu, i);
+               c_delta = read_one_residency(cpu, msr_addr, &prev);
+               PREV_MSR_VAL(pcpu, i) = prev;
+
+               if (!is_first && c_delta) {
+                       c_delta *= clock_multiplier;
+                       RESIDENCY(pcpu, i) = c_delta;
+                       cx_total += c_delta;
+               }
+       }
+
+       /* compute time interval between two measurements */
+       tscval(&tsc);
+       delta_tsc = tsc - pcpu->tsc; /* TSC delta */
+       pcpu->tsc = tsc;
+
+       /*Actually, it is c0+c1 residency*/
+       RESIDENCY(pcpu, MPERF) = c0 = delta_tsc - cx_total;
+       /* cpu_load = 100*c0 / delta_tsc */
+       cpu_load = c0 * 100;
+       do_div(cpu_load, delta_tsc);
+
+       return (unsigned int)cpu_load;
+
+};
+
+unsigned int cpufreq_get_load(struct cpufreq_policy *policy, unsigned int cpu)
+{
+       struct per_cpu_t *pcpu = NULL;
+
+       pcpu = &__get_cpu_var(per_cpu_counts);
+
+       return calc_cpu_load(pcpu, cpu);
+}
+
 static int sfi_cpufreq_target(struct cpufreq_policy *policy,
                               unsigned int target_freq, unsigned int relation)
 {
@@ -367,8 +554,10 @@ static int sfi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 
        /* Check for APERF/MPERF support in hardware */
-       if (cpu_has(c, X86_FEATURE_APERFMPERF))
+       if (cpu_has(c, X86_FEATURE_APERFMPERF)) {
                sfi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
+               sfi_cpufreq_driver.getload = cpufreq_get_load;
+       }
 
        pr_debug("CPU%u - SFI performance management activated.\n", cpu);
        for (i = 0; i < perf->state_count; i++)