if (ret || val > 1)
return -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
set_boost(policy, val);
- put_online_cpus();
+ cpus_read_unlock();
return count;
}
cpufreq_driver->boost_enabled = state;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
- get_online_cpus();
+ cpus_read_lock();
for_each_active_policy(policy) {
ret = cpufreq_driver->set_boost(policy, state);
if (ret)
goto err_reset_state;
}
- put_online_cpus();
+ cpus_read_unlock();
return 0;
err_reset_state:
- put_online_cpus();
+ cpus_read_unlock();
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver->boost_enabled = !state;
default_powersave_bias = powersave_bias;
cpumask_clear(&done);
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
struct cpufreq_policy *policy;
struct policy_dbs_info *policy_dbs;
od_tuners = dbs_data->tuners;
od_tuners->powersave_bias = default_powersave_bias;
}
- put_online_cpus();
+ cpus_read_unlock();
}
void od_register_powersave_bias_handler(unsigned int (*f)
#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
#include <asm/intel-family.h>
+#include "../drivers/thermal/intel/thermal_interrupt.h"
#define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)
* @sched_flags: Store scheduler flags for possible cross CPU update
* @hwp_boost_min: Last HWP boosted min performance
* @suspended: Whether or not the driver has been suspended.
+ * @hwp_notify_work: workqueue for HWP notifications.
*
* This structure stores per CPU instance data for all CPUs.
*/
unsigned int sched_flags;
u32 hwp_boost_min;
bool suspended;
+ struct delayed_work hwp_notify_work;
};
static struct cpudata **all_cpu_data;
/************************** sysfs end ************************/
+static void intel_pstate_notify_work(struct work_struct *work)
+{
+ mutex_lock(&intel_pstate_driver_lock);
+ cpufreq_update_policy(smp_processor_id());
+ wrmsrl(MSR_HWP_STATUS, 0);
+ mutex_unlock(&intel_pstate_driver_lock);
+}
+
+void notify_hwp_interrupt(void)
+{
+ unsigned int this_cpu = smp_processor_id();
+ struct cpudata *cpudata;
+ u64 value;
+
+ if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
+ return;
+
+ rdmsrl(MSR_HWP_STATUS, value);
+ if (!(value & 0x01))
+ return;
+
+ cpudata = all_cpu_data[this_cpu];
+ schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10));
+}
+
+static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
+{
+ /* Enable HWP notification interrupt for guaranteed performance change */
+ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
+ INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
+ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);
+ }
+}
+
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
/* First disable HWP notification interrupt as we don't process them */
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
if (cpudata->epp_default == -EINVAL)
cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
+
+ intel_pstate_enable_hwp_interrupt(cpudata);
}
static int atom_get_min_pstate(void)
{
unsigned int cpu;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) {
if (intel_pstate_driver == &intel_pstate)
all_cpu_data[cpu] = NULL;
}
}
- put_online_cpus();
+ cpus_read_unlock();
intel_pstate_driver = NULL;
}
if (!x86_match_cpu(powernow_k8_ids))
return -ENODEV;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(i) {
smp_call_function_single(i, check_supported_cpu, &ret, 1);
if (!ret)
}
if (supported_cpus != num_online_cpus()) {
- put_online_cpus();
+ cpus_read_unlock();
return -ENODEV;
}
- put_online_cpus();
+ cpus_read_unlock();
ret = cpufreq_register_driver(&cpufreq_amd64_driver);
if (ret)
unsigned int cpu;
cpumask_t mask;
- get_online_cpus();
+ cpus_read_lock();
cpumask_and(&mask, &chip->mask, cpu_online_mask);
smp_call_function_any(&mask,
powernv_cpufreq_throttle_check, NULL, 0);
cpufreq_cpu_put(policy);
}
out:
- put_online_cpus();
+ cpus_read_unlock();
}
static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
if (!required_opp_tables)
return 0;
+ /* required-opps not fully initialized yet */
+ if (lazy_linking_pending(opp_table))
+ return -EBUSY;
+
/*
* We only support genpd's OPPs in the "required-opps" for now, as we
* don't know much about other use cases. Error out if the required OPP
return -ENOENT;
}
- /* required-opps not fully initialized yet */
- if (lazy_linking_pending(opp_table))
- return -EBUSY;
-
/* Single genpd case */
if (!genpd_virt_devs)
return _set_required_opp(dev, dev, opp, 0);
* so that things like MSI message writing will behave as expected
* (e.g. if the device really is in D0 at enable time).
*/
- if (dev->pm_cap) {
- u16 pmcsr;
- pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
- dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
- }
+ pci_update_current_state(dev, dev->current_state);
if (atomic_inc_return(&dev->enable_cnt) > 1)
return 0; /* already enabled */
if (enable) {
int error;
- if (pci_pme_capable(dev, state))
+ /*
+ * Enable PME signaling if the device can signal PME from
+ * D3cold regardless of whether or not it can signal PME from
+ * the current target state, because that will allow it to
+ * signal PME when the hierarchy above it goes into D3cold and
+ * the device itself ends up in D3cold as a result of that.
+ */
+ if (pci_pme_capable(dev, state) || pci_pme_capable(dev, PCI_D3cold))
pci_pme_active(dev, true);
else
ret = 1;
if (dev->current_state == PCI_D3cold)
target_state = PCI_D3cold;
- if (wakeup) {
+ if (wakeup && dev->pme_support) {
+ pci_power_t state = target_state;
+
/*
* Find the deepest state from which the device can generate
* PME#.
*/
- if (dev->pme_support) {
- while (target_state
- && !(dev->pme_support & (1 << target_state)))
- target_state--;
- }
+ while (state && !(dev->pme_support & (1 << state)))
+ state--;
+
+ if (state)
+ return state;
+ else if (dev->pme_support & 1)
+ return PCI_D0;
}
return target_state;
/* prevent CPU hotplug, make sure the RAPL domain does not go
* away while reading the counter.
*/
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
*energy_raw = energy_now;
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
- put_online_cpus();
+ cpus_read_unlock();
return -EIO;
}
if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
return -EACCES;
- get_online_cpus();
+ cpus_read_lock();
rapl_write_data_raw(rd, PL1_ENABLE, mode);
if (rapl_defaults->set_floor_freq)
rapl_defaults->set_floor_freq(rd, mode);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
*mode = false;
return 0;
}
- get_online_cpus();
+ cpus_read_lock();
if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
- put_online_cpus();
+ cpus_read_unlock();
return -EIO;
}
*mode = val;
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
if (!ret)
package_power_limit_irq_save(rp);
set_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
prim = POWER_LIMIT4;
break;
default:
- put_online_cpus();
+ cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
*data = val;
get_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
}
set_time_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
val = 0;
break;
default:
- put_online_cpus();
+ cpus_read_unlock();
return -EINVAL;
}
if (!ret)
*data = val;
get_time_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
int prim;
int ret = 0;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
switch (rd->rpl[id].prim_id) {
case PL1_ENABLE:
prim = MAX_POWER;
break;
default:
- put_online_cpus();
+ cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
if (rd->rpl[id].prim_id == PL4_ENABLE)
*data = *data * 2;
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
struct rapl_domain *rd;
int nr_pl, ret, i;
- get_online_cpus();
+ cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
}
}
}
- put_online_cpus();
+ cpus_read_unlock();
}
static void power_limit_state_restore(void)
struct rapl_domain *rd;
int nr_pl, i;
- get_online_cpus();
+ cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
}
}
}
- put_online_cpus();
+ cpus_read_unlock();
}
static int rapl_pm_callback(struct notifier_block *nb,
/* List of verified CPUs. */
static const struct x86_cpu_id pl4_support_ids[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
{}
};
platform_thermal_notify(msr_val);
}
+void __weak notify_hwp_interrupt(void)
+{
+ wrmsrl_safe(MSR_HWP_STATUS, 0);
+}
+
/* Thermal transition interrupt handler */
void intel_thermal_interrupt(void)
{
__u64 msr_val;
if (static_cpu_has(X86_FEATURE_HWP))
- wrmsrl_safe(MSR_HWP_STATUS, 0);
+ notify_hwp_interrupt();
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
* callback has rate control */
extern bool (*platform_thermal_package_rate_control)(void);
+/* Handle HWP interrupt */
+extern void notify_hwp_interrupt(void);
+
#endif /* _INTEL_THERMAL_INTERRUPT_H */
#ifdef CONFIG_ENERGY_MODEL
#define EM_MAX_POWER 0xFFFF
+/*
+ * Increase resolution of energy estimation calculations for 64-bit
+ * architectures. The extra resolution improves decision made by EAS for the
+ * task placement when two Performance Domains might provide similar energy
+ * estimation values (w/o better resolution the values could be equal).
+ *
+ * We increase resolution only if we have enough bits to allow this increased
+ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
+ * are pretty high and the returns do not justify the increased costs.
+ */
+#ifdef CONFIG_64BIT
+#define em_scale_power(p) ((p) * 1000)
+#else
+#define em_scale_power(p) (p)
+#endif
+
struct em_data_callback {
/**
* active_power() - Provide power at the next performance state of
extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
unsigned long val, void *v);
-extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
- unsigned long val_up, unsigned long val_down, void *v);
extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh,
unsigned long val_up, unsigned long val_down, void *v);
extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh,
#include <linux/spinlock.h>
#include <linux/syscore_ops.h>
-static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain);
+/*
+ * atomic_notifiers use a spinlock_t, which can block under PREEMPT_RT.
+ * Notifications for cpu_pm will be issued by the idle task itself, which can
+ * never block, IOW it requires using a raw_spinlock_t.
+ */
+static struct {
+ struct raw_notifier_head chain;
+ raw_spinlock_t lock;
+} cpu_pm_notifier = {
+ .chain = RAW_NOTIFIER_INIT(cpu_pm_notifier.chain),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(cpu_pm_notifier.lock),
+};
static int cpu_pm_notify(enum cpu_pm_event event)
{
int ret;
/*
- * atomic_notifier_call_chain has a RCU read critical section, which
- * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
- * RCU know this.
+ * This introduces a RCU read critical section, which could be
+ * disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know
+ * this.
*/
rcu_irq_enter_irqson();
- ret = atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL);
+ rcu_read_lock();
+ ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL);
+ rcu_read_unlock();
rcu_irq_exit_irqson();
return notifier_to_errno(ret);
static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event event_down)
{
+ unsigned long flags;
int ret;
rcu_irq_enter_irqson();
- ret = atomic_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL);
+ raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
+ ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL);
+ raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
rcu_irq_exit_irqson();
return notifier_to_errno(ret);
* Add a driver to a list of drivers that are notified about
* CPU and CPU cluster low power entry and exit.
*
- * This function may sleep, and has the same return conditions as
- * raw_notifier_chain_register.
+ * This function has the same return conditions as raw_notifier_chain_register.
*/
int cpu_pm_register_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb);
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
+ ret = raw_notifier_chain_register(&cpu_pm_notifier.chain, nb);
+ raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
+ return ret;
}
EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
*
* Remove a driver from the CPU PM notifier list.
*
- * This function may sleep, and has the same return conditions as
- * raw_notifier_chain_unregister.
+ * This function has the same return conditions as raw_notifier_chain_unregister.
*/
int cpu_pm_unregister_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
+ ret = raw_notifier_chain_unregister(&cpu_pm_notifier.chain, nb);
+ raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
+ return ret;
}
EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
}
EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
-int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
- unsigned long val_up, unsigned long val_down, void *v)
-{
- unsigned long flags;
- int ret;
-
- /*
- * Musn't use RCU; because then the notifier list can
- * change between the up and down traversal.
- */
- spin_lock_irqsave(&nh->lock, flags);
- ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
- spin_unlock_irqrestore(&nh->lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(atomic_notifier_call_chain_robust);
-NOKPROBE_SYMBOL(atomic_notifier_call_chain_robust);
-
/**
* atomic_notifier_call_chain - Call functions in an atomic notifier chain
* @nh: Pointer to head of the atomic notifier chain
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
for (i = 0; i < nr_states; i++) {
- table[i].cost = div64_u64(fmax * table[i].power,
+ unsigned long power_res = em_scale_power(table[i].power);
+
+ table[i].cost = div64_u64(fmax * power_res,
table[i].frequency);
}
struct kobject *power_kobj;
-/**
+/*
* state - control system sleep states.
*
* show() returns available sleep state labels, which may be "mem", "standby",
s2idle_state = S2IDLE_STATE_ENTER;
raw_spin_unlock_irq(&s2idle_lock);
- get_online_cpus();
+ cpus_read_lock();
cpuidle_resume();
/* Push all the CPUs into the idle loop. */
s2idle_state == S2IDLE_STATE_WAKE);
cpuidle_pause();
- put_online_cpus();
+ cpus_read_unlock();
raw_spin_lock_irq(&s2idle_lock);
{
struct rtc_device *candidate = to_rtc_device(dev);
- if (!candidate->ops->set_alarm)
+ if (!test_bit(RTC_FEATURE_ALARM, candidate->features))
return 0;
if (!device_may_wakeup(candidate->dev.parent))
return 0;
};
ATTRIBUTE_GROUPS(sugov);
+static void sugov_tunables_free(struct kobject *kobj)
+{
+ struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj);
+
+ kfree(to_sugov_tunables(attr_set));
+}
+
static struct kobj_type sugov_tunables_ktype = {
.default_groups = sugov_groups,
.sysfs_ops = &governor_sysfs_ops,
+ .release = &sugov_tunables_free,
};
/********************** cpufreq governor interface *********************/
return tunables;
}
-static void sugov_tunables_free(struct sugov_tunables *tunables)
+static void sugov_clear_global_tunables(void)
{
if (!have_governor_per_policy())
global_tunables = NULL;
-
- kfree(tunables);
}
static int sugov_init(struct cpufreq_policy *policy)
fail:
kobject_put(&tunables->attr_set.kobj);
policy->governor_data = NULL;
- sugov_tunables_free(tunables);
+ sugov_clear_global_tunables();
stop_kthread:
sugov_kthread_stop(sg_policy);
count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
policy->governor_data = NULL;
if (!count)
- sugov_tunables_free(tunables);
+ sugov_clear_global_tunables();
mutex_unlock(&global_tunables_lock);