From d0ba132b1488a9ea3b628c02e8e9fead0aba6be4 Mon Sep 17 00:00:00 2001
From: Sundar Iyer <sundar.iyer@intel.com>
Date: Mon, 19 Mar 2012 15:27:59 +0530
Subject: [PATCH] cpufreq/x86: Enable __getload() helper for Medfield-x86
 platform

BZ: 31189

Enable the hardware based load calculation for the Medfield x86-HT platform.
This makes sure that the on-demand governor responds correctly to the load
based on the physical core and not the logical siblings

Change-Id: Iaeaf5f154ca955f3e8cfe09c052f23913edbf5a1
Signed-off-by: Chuan A Wang <chuan.a.wang@intel.com>
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Ke Chen <ke.chen@intel.com>
Signed-off-by: Sundar Iyer <sundar.iyer@intel.com>
Reviewed-on: http://android.intel.com:8080/48470
Reviewed-by: Martin, LoicX <loicx.martin@intel.com>
Tested-by: Martin, LoicX <loicx.martin@intel.com>
Reviewed-by: buildbot <buildbot@intel.com>
Tested-by: buildbot <buildbot@intel.com>
---
 drivers/cpufreq/sfi-cpufreq.c | 191 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 190 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c
index 688ef5c..dd9acee 100644
--- a/drivers/cpufreq/sfi-cpufreq.c
+++ b/drivers/cpufreq/sfi-cpufreq.c
@@ -205,6 +205,193 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	return cached_freq;
 }
 
+/*
+ * Since MSRs record the residency with 1MHz clock,
+ * then to compute the actual residency at maximum speed,
+ * the value should multiply (maximum clock / 1M ).
+ * For MFLD, maximum speed is 1597MHz, so the multiplier=1597
+ */
+#define MFLD_RESIDENCY_COUNT_MULTIPLIER (1597UL)
+/*C-states related data structures and fuctions for cpu load calculation*/
+
+/*  MSR counter stuff */
+enum {
+	MPERF = 0, /* C0 */
+	APERF,     /* C1 */
+	C2,
+	C3,
+	C4,
+	C5,
+	C6,
+	C7,
+	C8,
+	C9,
+	/* C10, */
+	/* C11, */
+	MAX_MSR_ADDRESSES
+};
+
+/*
+ * The core MSR addresses are hard coded for Intel ATOM MFLD
+ * The addresses need to be re-checked for other Intel devices.
+ *
+ */
+static unsigned int CoreResidencyMSRAddresses[MAX_MSR_ADDRESSES] = {
+	0xE7,                   /*MPERF*/
+	0xFFFFFFFF,             /*C1*/
+	0x3F8,                  /*C2*/
+	0xFFFFFFFF,             /*C3*/
+	0x3F9,                  /*C4*/
+	0xFFFFFFFF,             /*C5*/
+	0x3FA,                  /*C6*/
+	0xFFFFFFFF,             /*C7*/
+	0xFFFFFFFF,             /*C8*/
+	0xFFFFFFFF              /*C9*/
+};
+
+/*
+ * Per-cpu structure holding MSR residency counts,
+ * timer-TSC values etc.
+ */
+struct per_cpu_t {
+	u64 tsc; /* 8 bytes */
+	u64 residencies[MAX_MSR_ADDRESSES]; /* 96 bytes */
+	u64 prev_msr_vals[MAX_MSR_ADDRESSES]; /* 96 bytes */
+};
+
+/*
+ * Convenience macros for accessing per-cpu residencies
+ */
+#define RESIDENCY(p, i) ((p)->residencies[(i)])
+#define PREV_MSR_VAL(p, i) ((p)->prev_msr_vals[(i)])
+
+static DEFINE_PER_CPU(struct per_cpu_t, per_cpu_counts);
+
+/*
+ * Do we read the TSC MSR directly to determine
+ * TSC (as opposed to using a kernel
+ * function call -- e.g. rdtscll)?
+ */
+#define READ_MSR_FOR_TSC 1
+
+/* Helper function to get TSC */
+static inline void tscval(u64 *v)
+{
+#if READ_MSR_FOR_TSC
+	u64 res;
+	rdmsrl(0x10, res);
+	*v = res;
+#else
+	unsigned int aux;
+	rdtscpll(*v, aux);
+#endif
+};
+
+#define C1 APERF
+
+/*
+ * Get the delta residency for MSRs
+ */
+static u64 read_one_residency(int cpu, int msr_addr, u64 *prev)
+{
+	u64 curr = 0, delta = 0;
+
+	rdmsrl(msr_addr, curr);
+
+	if (unlikely(curr < *prev))
+		delta = ((u64)(~0) - *prev) + (curr + 1);
+	else
+		delta = curr - *prev;
+
+	*prev = curr;
+
+	return delta;
+};
+
+static unsigned int calc_cpu_load(struct per_cpu_t *pcpu, int cpu)
+{
+	int i = 0;
+	u64 prev;
+	int msr_addr;
+	u64 tsc;
+	u64 delta_tsc, c0;
+	u64 m_delta, c_delta;
+	bool is_first = false;
+	u64 cx_total = 0;
+	u32 clock_multiplier = MFLD_RESIDENCY_COUNT_MULTIPLIER;
+	u64 cpu_load = 0;
+	/*
+	 * Ensure updates are propagated.
+	 */
+	smp_mb();
+
+	is_first = false;
+
+	if (unlikely(PREV_MSR_VAL(pcpu, MPERF) == 0))
+		is_first = true;
+
+	msr_addr = CoreResidencyMSRAddresses[MPERF];
+	prev = PREV_MSR_VAL(pcpu, MPERF);
+	/*
+	 * Read MPERF, compute DELTA(MPERF)
+	 */
+	m_delta = read_one_residency(cpu, msr_addr, &prev);
+
+	PREV_MSR_VAL(pcpu, MPERF) = prev;
+	/*
+	 * 'C1' is a DERIVED residency -- we
+	 * don't read MSRs for it. Instead, we
+	 * compute its value from the values of
+	 * OTHER Cx/MPERF/TSC. Reset to zero here.
+	 * Currently, we combine C1 with C0 together
+	 * as C0 for load calculation. And expriments
+	 * show that make sense.
+	 */
+	RESIDENCY(pcpu, C1) = 0;
+	/*
+	 * Calculate (non-C1) C-state residency
+	 */
+	for (i = C2; i <= C6; ++i) {
+		RESIDENCY(pcpu, i) = 0;
+		msr_addr = CoreResidencyMSRAddresses[i];
+		if (msr_addr <= 0)
+			continue;
+
+		prev = PREV_MSR_VAL(pcpu, i);
+		c_delta = read_one_residency(cpu, msr_addr, &prev);
+		PREV_MSR_VAL(pcpu, i) = prev;
+
+		if (!is_first && c_delta) {
+			c_delta *= clock_multiplier;
+			RESIDENCY(pcpu, i) = c_delta;
+			cx_total += c_delta;
+		}
+	}
+
+	/* compute time interval between two measurements */
+	tscval(&tsc);
+	delta_tsc = tsc - pcpu->tsc; /* TSC delta */
+	pcpu->tsc = tsc;
+
+	/*Actually, it is c0+c1 residency*/
+	RESIDENCY(pcpu, MPERF) = c0 = delta_tsc - cx_total;
+	/* cpu_load = 100*c0 / delta_tsc */
+	cpu_load = c0 * 100;
+	do_div(cpu_load, delta_tsc);
+
+	return (unsigned int)cpu_load;
+
+};
+
+unsigned int cpufreq_get_load(struct cpufreq_policy *policy, unsigned int cpu)
+{
+	struct per_cpu_t *pcpu = NULL;
+
+	pcpu = &__get_cpu_var(per_cpu_counts);
+
+	return calc_cpu_load(pcpu, cpu);
+}
+
 static int sfi_cpufreq_target(struct cpufreq_policy *policy,
 			       unsigned int target_freq, unsigned int relation)
 {
@@ -367,8 +554,10 @@ static int sfi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 
 	/* Check for APERF/MPERF support in hardware */
-	if (cpu_has(c, X86_FEATURE_APERFMPERF))
+	if (cpu_has(c, X86_FEATURE_APERFMPERF)) {
 		sfi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
+		sfi_cpufreq_driver.getload = cpufreq_get_load;
+	}
 
 	pr_debug("CPU%u - SFI performance management activated.\n", cpu);
 	for (i = 0; i < perf->state_count; i++)
-- 
2.7.4