#include <linux/types.h>
#include <asm/hw_irq.h>
#include <linux/device.h>
+ #include <uapi/asm/perf_event.h>
#define MAX_HWEVENTS 8
#define MAX_EVENT_ALTERNATIVES 8
#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
- /*
- * We use the event config bit 63 as a flag to request EBB.
- */
- #define EVENT_CONFIG_EBB_SHIFT 63
-
extern int register_power_pmu(struct power_pmu *);
struct pt_regs;
#define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr
#define EVENT_ATTR(_name, _id, _suffix) \
- PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id, \
+ PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id, \
power_events_sysfs_show)
#define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g)
#define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g)
-#define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(PM_##_name, _id, _p)
+#define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _p)
#define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p)
{
struct task_struct *p = current;
- if (!sched_feat_numa(NUMA))
+ if (!numabalancing_enabled)
return;
/* FIXME: Allocate task-specific structure for placement policy here */
return 0;
}
+static void record_wakee(struct task_struct *p)
+{
+ /*
+ * Rough decay (wiping) for cost saving, don't worry
+ * about the boundary, really active task won't care
+ * about the loss.
+ */
+ if (jiffies > current->wakee_flip_decay_ts + HZ) {
+ current->wakee_flips = 0;
+ current->wakee_flip_decay_ts = jiffies;
+ }
+
+ if (current->last_wakee != p) {
+ current->last_wakee = p;
+ current->wakee_flips++;
+ }
+}
static void task_waking_fair(struct task_struct *p)
{
#endif
se->vruntime -= min_vruntime;
+ record_wakee(p);
}
#ifdef CONFIG_FAIR_GROUP_SCHED
#endif
+static int wake_wide(struct task_struct *p)
+{
+ int factor = this_cpu_read(sd_llc_size);
+
+ /*
+ * Yeah, it's the switching-frequency, could means many wakee or
+ * rapidly switch, use factor here will just help to automatically
+ * adjust the loose-degree, so bigger node will lead to more pull.
+ */
+ if (p->wakee_flips > factor) {
+ /*
+ * wakee is somewhat hot, it needs certain amount of cpu
+ * resource, so if waker is far more hot, prefer to leave
+ * it alone.
+ */
+ if (current->wakee_flips > (factor * p->wakee_flips))
+ return 1;
+ }
+
+ return 0;
+}
+
static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
{
s64 this_load, load;
unsigned long weight;
int balanced;
+ /*
+ * If we wake multiple tasks be careful to not bounce
+ * ourselves around too much.
+ */
+ if (wake_wide(p))
+ return 0;
+
idx = sd->wake_idx;
this_cpu = smp_processor_id();
prev_cpu = task_cpu(p);
}
/*
- * Compute the cpu's hierarchical load factor for each task group.
+ * Compute the hierarchical load factor for cfs_rq and all its ascendants.
* This needs to be done in a top-down fashion because the load of a child
* group is a fraction of its parents load.
*/
-static int tg_load_down(struct task_group *tg, void *data)
-{
- unsigned long load;
- long cpu = (long)data;
-
- if (!tg->parent) {
- load = cpu_rq(cpu)->avg.load_avg_contrib;
- } else {
- load = tg->parent->cfs_rq[cpu]->h_load;
- load = div64_ul(load * tg->se[cpu]->avg.load_avg_contrib,
- tg->parent->cfs_rq[cpu]->runnable_load_avg + 1);
- }
-
- tg->cfs_rq[cpu]->h_load = load;
-
- return 0;
-}
-
-static void update_h_load(long cpu)
+static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
{
- struct rq *rq = cpu_rq(cpu);
+ struct rq *rq = rq_of(cfs_rq);
+ struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
unsigned long now = jiffies;
+ unsigned long load;
- if (rq->h_load_throttle == now)
+ if (cfs_rq->last_h_load_update == now)
return;
- rq->h_load_throttle = now;
+ cfs_rq->h_load_next = NULL;
+ for_each_sched_entity(se) {
+ cfs_rq = cfs_rq_of(se);
+ cfs_rq->h_load_next = se;
+ if (cfs_rq->last_h_load_update == now)
+ break;
+ }
- rcu_read_lock();
- walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
- rcu_read_unlock();
+ if (!se) {
+ cfs_rq->h_load = rq->avg.load_avg_contrib;
+ cfs_rq->last_h_load_update = now;
+ }
+
+ while ((se = cfs_rq->h_load_next) != NULL) {
+ load = cfs_rq->h_load;
+ load = div64_ul(load * se->avg.load_avg_contrib,
+ cfs_rq->runnable_load_avg + 1);
+ cfs_rq = group_cfs_rq(se);
+ cfs_rq->h_load = load;
+ cfs_rq->last_h_load_update = now;
+ }
}
static unsigned long task_h_load(struct task_struct *p)
{
struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ update_cfs_rq_h_load(cfs_rq);
return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load,
cfs_rq->runnable_load_avg + 1);
}
{
}
-static inline void update_h_load(long cpu)
-{
-}
-
static unsigned long task_h_load(struct task_struct *p)
{
return p->se.avg.load_avg_contrib;
env.src_rq = busiest;
env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
- update_h_load(env.src_cpu);
more_balance:
local_irq_save(flags);
double_rq_lock(env.dst_rq, busiest);
entity_tick(cfs_rq, se, queued);
}
- if (sched_feat_numa(NUMA))
+ if (numabalancing_enabled)
task_tick_numa(rq, curr);
update_rq_runnable_avg(rq, 1);