From d0ba132b1488a9ea3b628c02e8e9fead0aba6be4 Mon Sep 17 00:00:00 2001 From: Sundar Iyer Date: Mon, 19 Mar 2012 15:27:59 +0530 Subject: [PATCH] cpufreq/x86: Enable __getload() helper for Medfield-x86 platform BZ: 31189 Enable the hardware based load calculation for the Medfield x86-HT platform. This makes sure that the on-demand governor responds correctly to the load based on the physical core and not the logical siblings Change-Id: Iaeaf5f154ca955f3e8cfe09c052f23913edbf5a1 Signed-off-by: Chuan A Wang Signed-off-by: Youquan Song Signed-off-by: Ke Chen Signed-off-by: Sundar Iyer Reviewed-on: http://android.intel.com:8080/48470 Reviewed-by: Martin, LoicX Tested-by: Martin, LoicX Reviewed-by: buildbot Tested-by: buildbot --- drivers/cpufreq/sfi-cpufreq.c | 191 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 190 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c index 688ef5c..dd9acee 100644 --- a/drivers/cpufreq/sfi-cpufreq.c +++ b/drivers/cpufreq/sfi-cpufreq.c @@ -205,6 +205,193 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return cached_freq; } +/* + * Since MSRs record the residency with 1MHz clock, + * then to compute the actual residency at maximum speed, + * the value should multiply (maximum clock / 1M ). + * For MFLD, maximum speed is 1597MHz, so the multiplier=1597 + */ +#define MFLD_RESIDENCY_COUNT_MULTIPLIER (1597UL) +/*C-states related data structures and fuctions for cpu load calculation*/ + +/* MSR counter stuff */ +enum { + MPERF = 0, /* C0 */ + APERF, /* C1 */ + C2, + C3, + C4, + C5, + C6, + C7, + C8, + C9, + /* C10, */ + /* C11, */ + MAX_MSR_ADDRESSES +}; + +/* + * The core MSR addresses are hard coded for Intel ATOM MFLD + * The addresses need to be re-checked for other Intel devices. + * + */ +static unsigned int CoreResidencyMSRAddresses[MAX_MSR_ADDRESSES] = { + 0xE7, /*MPERF*/ + 0xFFFFFFFF, /*C1*/ + 0x3F8, /*C2*/ + 0xFFFFFFFF, /*C3*/ + 0x3F9, /*C4*/ + 0xFFFFFFFF, /*C5*/ + 0x3FA, /*C6*/ + 0xFFFFFFFF, /*C7*/ + 0xFFFFFFFF, /*C8*/ + 0xFFFFFFFF /*C9*/ +}; + +/* + * Per-cpu structure holding MSR residency counts, + * timer-TSC values etc. + */ +struct per_cpu_t { + u64 tsc; /* 8 bytes */ + u64 residencies[MAX_MSR_ADDRESSES]; /* 96 bytes */ + u64 prev_msr_vals[MAX_MSR_ADDRESSES]; /* 96 bytes */ +}; + +/* + * Convenience macros for accessing per-cpu residencies + */ +#define RESIDENCY(p, i) ((p)->residencies[(i)]) +#define PREV_MSR_VAL(p, i) ((p)->prev_msr_vals[(i)]) + +static DEFINE_PER_CPU(struct per_cpu_t, per_cpu_counts); + +/* + * Do we read the TSC MSR directly to determine + * TSC (as opposed to using a kernel + * function call -- e.g. rdtscll)? + */ +#define READ_MSR_FOR_TSC 1 + +/* Helper function to get TSC */ +static inline void tscval(u64 *v) +{ +#if READ_MSR_FOR_TSC + u64 res; + rdmsrl(0x10, res); + *v = res; +#else + unsigned int aux; + rdtscpll(*v, aux); +#endif +}; + +#define C1 APERF + +/* + * Get the delta residency for MSRs + */ +static u64 read_one_residency(int cpu, int msr_addr, u64 *prev) +{ + u64 curr = 0, delta = 0; + + rdmsrl(msr_addr, curr); + + if (unlikely(curr < *prev)) + delta = ((u64)(~0) - *prev) + (curr + 1); + else + delta = curr - *prev; + + *prev = curr; + + return delta; +}; + +static unsigned int calc_cpu_load(struct per_cpu_t *pcpu, int cpu) +{ + int i = 0; + u64 prev; + int msr_addr; + u64 tsc; + u64 delta_tsc, c0; + u64 m_delta, c_delta; + bool is_first = false; + u64 cx_total = 0; + u32 clock_multiplier = MFLD_RESIDENCY_COUNT_MULTIPLIER; + u64 cpu_load = 0; + /* + * Ensure updates are propagated. + */ + smp_mb(); + + is_first = false; + + if (unlikely(PREV_MSR_VAL(pcpu, MPERF) == 0)) + is_first = true; + + msr_addr = CoreResidencyMSRAddresses[MPERF]; + prev = PREV_MSR_VAL(pcpu, MPERF); + /* + * Read MPERF, compute DELTA(MPERF) + */ + m_delta = read_one_residency(cpu, msr_addr, &prev); + + PREV_MSR_VAL(pcpu, MPERF) = prev; + /* + * 'C1' is a DERIVED residency -- we + * don't read MSRs for it. Instead, we + * compute its value from the values of + * OTHER Cx/MPERF/TSC. Reset to zero here. + * Currently, we combine C1 with C0 together + * as C0 for load calculation. And expriments + * show that make sense. + */ + RESIDENCY(pcpu, C1) = 0; + /* + * Calculate (non-C1) C-state residency + */ + for (i = C2; i <= C6; ++i) { + RESIDENCY(pcpu, i) = 0; + msr_addr = CoreResidencyMSRAddresses[i]; + if (msr_addr <= 0) + continue; + + prev = PREV_MSR_VAL(pcpu, i); + c_delta = read_one_residency(cpu, msr_addr, &prev); + PREV_MSR_VAL(pcpu, i) = prev; + + if (!is_first && c_delta) { + c_delta *= clock_multiplier; + RESIDENCY(pcpu, i) = c_delta; + cx_total += c_delta; + } + } + + /* compute time interval between two measurements */ + tscval(&tsc); + delta_tsc = tsc - pcpu->tsc; /* TSC delta */ + pcpu->tsc = tsc; + + /*Actually, it is c0+c1 residency*/ + RESIDENCY(pcpu, MPERF) = c0 = delta_tsc - cx_total; + /* cpu_load = 100*c0 / delta_tsc */ + cpu_load = c0 * 100; + do_div(cpu_load, delta_tsc); + + return (unsigned int)cpu_load; + +}; + +unsigned int cpufreq_get_load(struct cpufreq_policy *policy, unsigned int cpu) +{ + struct per_cpu_t *pcpu = NULL; + + pcpu = &__get_cpu_var(per_cpu_counts); + + return calc_cpu_load(pcpu, cpu); +} + static int sfi_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { @@ -367,8 +554,10 @@ static int sfi_cpufreq_cpu_init(struct cpufreq_policy *policy) /* Check for APERF/MPERF support in hardware */ - if (cpu_has(c, X86_FEATURE_APERFMPERF)) + if (cpu_has(c, X86_FEATURE_APERFMPERF)) { sfi_cpufreq_driver.getavg = cpufreq_get_measured_perf; + sfi_cpufreq_driver.getload = cpufreq_get_load; + } pr_debug("CPU%u - SFI performance management activated.\n", cpu); for (i = 0; i < perf->state_count; i++) -- 2.7.4