--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ENERGY_MODEL_H
+#define _LINUX_ENERGY_MODEL_H
+#include <linux/cpumask.h>
+#include <linux/jump_label.h>
+#include <linux/kobject.h>
+#include <linux/rcupdate.h>
+#include <linux/sched/cpufreq.h>
+#include <linux/sched/topology.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_ENERGY_MODEL
+struct em_cap_state {
+ unsigned long frequency; /* Kilo-hertz */
+ unsigned long power; /* Milli-watts */
+ unsigned long cost; /* power * max_frequency / frequency */
+};
+
+struct em_freq_domain {
+ struct em_cap_state *table; /* Capacity states, in ascending order. */
+ int nr_cap_states;
+ unsigned long cpus[0]; /* CPUs of the frequency domain. */
+};
+
+#define EM_CPU_MAX_POWER 0xFFFF
+
+struct em_data_callback {
+ /**
+ * active_power() - Provide power at the next capacity state of a CPU
+ * @power : Active power at the capacity state in mW (modified)
+ * @freq : Frequency at the capacity state in kHz (modified)
+ * @cpu : CPU for which we do this operation
+ *
+ * active_power() must find the lowest capacity state of 'cpu' above
+ * 'freq' and update 'power' and 'freq' to the matching active power
+ * and frequency.
+ *
+ * The power is the one of a single CPU in the domain, expressed in
+ * milli-watts. It is expected to fit in the [0, EM_CPU_MAX_POWER]
+ * range.
+ *
+ * Return 0 on success.
+ */
+ int (*active_power)(unsigned long *power, unsigned long *freq, int cpu);
+};
+#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb }
+
+struct em_freq_domain *em_cpu_get(int cpu);
+int em_register_freq_domain(cpumask_t *span, unsigned int nr_states,
+ struct em_data_callback *cb);
+
+/**
+ * em_fd_energy() - Estimates the energy consumed by the CPUs of a freq. domain
+ * @fd : frequency domain for which energy has to be estimated
+ * @max_util : highest utilization among CPUs of the domain
+ * @sum_util : sum of the utilization of all CPUs in the domain
+ *
+ * Return: the sum of the energy consumed by the CPUs of the domain assuming
+ * a capacity state satisfying the max utilization of the domain.
+ */
+static inline unsigned long em_fd_energy(struct em_freq_domain *fd,
+ unsigned long max_util, unsigned long sum_util)
+{
+ unsigned long freq, scale_cpu;
+ struct em_cap_state *cs;
+ int i, cpu;
+
+ /*
+ * In order to predict the capacity state, map the utilization of the
+ * most utilized CPU of the frequency domain to a requested frequency,
+ * like schedutil.
+ */
+ cpu = cpumask_first(to_cpumask(fd->cpus));
+ scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+ cs = &fd->table[fd->nr_cap_states - 1];
+ freq = map_util_freq(max_util, cs->frequency, scale_cpu);
+
+ /*
+ * Find the lowest capacity state of the Energy Model above the
+ * requested frequency.
+ */
+ for (i = 0; i < fd->nr_cap_states; i++) {
+ cs = &fd->table[i];
+ if (cs->frequency >= freq)
+ break;
+ }
+
+ /*
+ * The capacity of a CPU in the domain at that capacity state (cs)
+ * can be computed as:
+ *
+ * cs->freq * scale_cpu
+ * cs->cap = -------------------- (1)
+ * cpu_max_freq
+ *
+ * So, the energy consumed by this CPU at that capacity state is:
+ *
+ * cs->power * cpu_util
+ * cpu_nrg = -------------------- (2)
+ * cs->cap
+ *
+ * since 'cpu_util / cs->cap' represents its percentage of busy time.
+ * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
+ * of two terms:
+ *
+ * cs->power * cpu_max_freq cpu_util
+ * cpu_nrg = ------------------------ * --------- (3)
+ * cs->freq scale_cpu
+ *
+ * The first term is static, and is stored in the em_cap_state struct
+ * as 'cs->cost'.
+ *
+ * Since all CPUs of the domain have the same micro-architecture, they
+ * share the same 'cs->cost', and the same CPU capacity. Hence, the
+ * total energy of the domain (which is the simple sum of the energy of
+ * all of its CPUs) can be factorized as:
+ *
+ * cs->cost * \Sum cpu_util
+ * fd_nrg = ------------------------ (4)
+ * scale_cpu
+ */
+ return cs->cost * sum_util / scale_cpu;
+}
+
+/**
+ * em_fd_nr_cap_states() - Get the number of capacity states of a freq. domain
+ * @fd : frequency domain for which this must be done
+ *
+ * Return: the number of capacity states in the frequency domain table
+ */
+static inline int em_fd_nr_cap_states(struct em_freq_domain *fd)
+{
+ return fd->nr_cap_states;
+}
+
+#else
+struct em_freq_domain {};
+struct em_data_callback {};
+#define EM_DATA_CB(_active_power_cb) { }
+
+static inline int em_register_freq_domain(cpumask_t *span,
+ unsigned int nr_states, struct em_data_callback *cb)
+{
+ return -EINVAL;
+}
+static inline struct em_freq_domain *em_cpu_get(int cpu)
+{
+ return NULL;
+}
+static inline unsigned long em_fd_energy(struct em_freq_domain *fd,
+ unsigned long max_util, unsigned long sum_util)
+{
+ return 0;
+}
+static inline int em_fd_nr_cap_states(struct em_freq_domain *fd)
+{
+ return 0;
+}
+#endif
+
+#endif
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Energy Model of CPUs
+ *
+ * Copyright (c) 2018, Arm ltd.
+ * Written by: Quentin Perret, Arm ltd.
+ */
+
+#define pr_fmt(fmt) "energy_model: " fmt
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/energy_model.h>
+#include <linux/sched/topology.h>
+#include <linux/slab.h>
+
+/* Mapping of each CPU to the frequency domain to which it belongs. */
+static DEFINE_PER_CPU(struct em_freq_domain *, em_data);
+
+/*
+ * Mutex serializing the registrations of frequency domains and letting
+ * callbacks defined by drivers sleep.
+ */
+static DEFINE_MUTEX(em_fd_mutex);
+
+static struct em_freq_domain *em_create_fd(cpumask_t *span, int nr_states,
+ struct em_data_callback *cb)
+{
+ unsigned long opp_eff, prev_opp_eff = ULONG_MAX;
+ unsigned long power, freq, prev_freq = 0;
+ int i, ret, cpu = cpumask_first(span);
+ struct em_cap_state *table;
+ struct em_freq_domain *fd;
+ u64 fmax;
+
+ if (!cb->active_power)
+ return NULL;
+
+ fd = kzalloc(sizeof(*fd) + cpumask_size(), GFP_KERNEL);
+ if (!fd)
+ return NULL;
+
+ table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
+ if (!table)
+ goto free_fd;
+
+ /* Build the list of capacity states for this frequency domain */
+ for (i = 0, freq = 0; i < nr_states; i++, freq++) {
+ /*
+ * active_power() is a driver callback which ceils 'freq' to
+ * lowest capacity state of 'cpu' above 'freq' and updates
+ * 'power' and 'freq' accordingly.
+ */
+ ret = cb->active_power(&power, &freq, cpu);
+ if (ret) {
+ pr_err("fd%d: invalid cap. state: %d\n", cpu, ret);
+ goto free_cs_table;
+ }
+
+ /*
+ * We expect the driver callback to increase the frequency for
+ * higher capacity states.
+ */
+ if (freq <= prev_freq) {
+ pr_err("fd%d: non-increasing freq: %lu\n", cpu, freq);
+ goto free_cs_table;
+ }
+
+ /*
+ * The power returned by active_state() is expected to be in
+ * milli-watts and to fit into 16 bits.
+ */
+ if (power > EM_CPU_MAX_POWER) {
+ pr_err("fd%d: power out of scale: %lu\n", cpu, power);
+ goto free_cs_table;
+ }
+
+ table[i].power = power;
+ table[i].frequency = prev_freq = freq;
+
+ /*
+ * The hertz/watts efficiency ratio should decrease as the
+ * frequency grows on sane platforms. But this isn't always
+ * true in practice so warn the user if a higher OPP is more
+ * power efficient than a lower one.
+ */
+ opp_eff = freq / power;
+ if (opp_eff >= prev_opp_eff)
+ pr_warn("fd%d: hertz/watts ratio non-monotonically decreasing: OPP%d >= OPP%d\n",
+ cpu, i, i - 1);
+ prev_opp_eff = opp_eff;
+ }
+
+ /* Compute the cost of each capacity_state. */
+ fmax = (u64) table[nr_states - 1].frequency;
+ for (i = 0; i < nr_states; i++) {
+ table[i].cost = div64_u64(fmax * table[i].power,
+ table[i].frequency);
+ }
+
+ fd->table = table;
+ fd->nr_cap_states = nr_states;
+ cpumask_copy(to_cpumask(fd->cpus), span);
+
+ return fd;
+
+free_cs_table:
+ kfree(table);
+free_fd:
+ kfree(fd);
+
+ return NULL;
+}
+
+/**
+ * em_cpu_get() - Return the frequency domain for a CPU
+ * @cpu : CPU to find the frequency domain for
+ *
+ * Return: the frequency domain to which 'cpu' belongs, or NULL if it doesn't
+ * exist.
+ */
+struct em_freq_domain *em_cpu_get(int cpu)
+{
+ return READ_ONCE(per_cpu(em_data, cpu));
+}
+EXPORT_SYMBOL_GPL(em_cpu_get);
+
+/**
+ * em_register_freq_domain() - Register the Energy Model of a frequency domain
+ * @span : Mask of CPUs in the frequency domain
+ * @nr_states : Number of capacity states to register
+ * @cb : Callback functions providing the data of the Energy Model
+ *
+ * Create Energy Model tables for a frequency domain using the callbacks
+ * defined in cb.
+ *
+ * If multiple clients register the same frequency domain, all but the first
+ * registration will be ignored.
+ *
+ * Return 0 on success
+ */
+int em_register_freq_domain(cpumask_t *span, unsigned int nr_states,
+ struct em_data_callback *cb)
+{
+ unsigned long cap, prev_cap = 0;
+ struct em_freq_domain *fd;
+ int cpu, ret = 0;
+
+ if (!span || !nr_states || !cb)
+ return -EINVAL;
+
+ /*
+ * Use a mutex to serialize the registration of frequency domains and
+ * let the driver-defined callback functions sleep.
+ */
+ mutex_lock(&em_fd_mutex);
+
+ for_each_cpu(cpu, span) {
+ /* Make sure we don't register again an existing domain. */
+ if (READ_ONCE(per_cpu(em_data, cpu))) {
+ ret = -EEXIST;
+ goto unlock;
+ }
+
+ /*
+ * All CPUs of a domain must have the same micro-architecture
+ * since they all share the same table.
+ */
+ cap = arch_scale_cpu_capacity(NULL, cpu);
+ if (prev_cap && prev_cap != cap) {
+ pr_err("CPUs of %*pbl must have the same capacity\n",
+ cpumask_pr_args(span));
+ ret = -EINVAL;
+ goto unlock;
+ }
+ prev_cap = cap;
+ }
+
+ /* Create the frequency domain and add it to the Energy Model. */
+ fd = em_create_fd(span, nr_states, cb);
+ if (!fd) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ for_each_cpu(cpu, span) {
+ /*
+ * The per-cpu array can be concurrently accessed from
+ * em_cpu_get().
+ */
+ smp_store_release(per_cpu_ptr(&em_data, cpu), fd);
+ }
+
+ pr_debug("Created freq domain %*pbl\n", cpumask_pr_args(span));
+unlock:
+ mutex_unlock(&em_fd_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(em_register_freq_domain);