static bool cpu_overutilized(int cpu);
+static bool sd_overutilized(struct sched_domain *sd)
+{
+ return sd->shared->overutilized;
+}
+
+static void set_sd_overutilized(struct sched_domain *sd)
+{
+ sd->shared->overutilized = true;
+}
+
+static void clear_sd_overutilized(struct sched_domain *sd)
+{
+ sd->shared->overutilized = false;
+}
+
/*
* The enqueue_task method is called before nr_running is
* increased. Here we update the fair scheduling stats and
enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
+ struct sched_domain *sd;
struct sched_entity *se = &p->se;
int task_new = !(flags & ENQUEUE_WAKEUP);
if (!se) {
add_nr_running(rq, 1);
- if (!task_new && !rq->rd->overutilized &&
+ rcu_read_lock();
+ sd = rcu_dereference(rq->sd);
+ if (!task_new && sd && !sd_overutilized(sd) &&
cpu_overutilized(rq->cpu))
- rq->rd->overutilized = true;
+ set_sd_overutilized(sd);
+ rcu_read_unlock();
}
hrtick_update(rq);
}
unsigned long max_spare = 0;
struct sched_domain *sd;
- rcu_read_lock();
-
sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
if (!sd)
}
unlock:
- rcu_read_unlock();
-
if (energy_cpu == prev_cpu && !cpu_overutilized(prev_cpu))
return prev_cpu;
&& cpumask_test_cpu(cpu, &p->cpus_allowed);
}
- if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized))
- return select_energy_cpu_brute(p, prev_cpu);
-
rcu_read_lock();
+ sd = rcu_dereference(cpu_rq(prev_cpu)->sd);
+ if (energy_aware() && sd && !sd_overutilized(sd)) {
+ new_cpu = select_energy_cpu_brute(p, prev_cpu);
+ goto unlock;
+ }
+
+ sd = NULL;
+
for_each_domain(cpu, tmp) {
if (!(tmp->flags & SD_LOAD_BALANCE))
break;
} else {
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
}
+
+unlock:
rcu_read_unlock();
#ifdef CONFIG_NO_HZ_COMMON
unsigned long total_running;
unsigned long total_load; /* Total load of all groups in sd */
unsigned long total_capacity; /* Total capacity of all groups in sd */
+ unsigned long total_util; /* Total util of all groups in sd */
unsigned long avg_load; /* Average load across all groups in sd */
struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
.total_running = 0UL,
.total_load = 0UL,
.total_capacity = 0UL,
+ .total_util = 0UL,
.busiest_stat = {
.avg_load = 0UL,
.sum_nr_running = 0,
static inline void update_sg_lb_stats(struct lb_env *env,
struct sched_group *group, int load_idx,
int local_group, struct sg_lb_stats *sgs,
- bool *overload, bool *overutilized)
+ bool *overload, bool *overutilized, bool *misfit_task)
{
unsigned long load;
int i, nr_running;
!sgs->group_misfit_task && rq->misfit_task)
sgs->group_misfit_task = capacity_of(i);
- if (cpu_overutilized(i))
+ if (cpu_overutilized(i)) {
*overutilized = true;
+
+ if (rq->misfit_task)
+ *misfit_task = true;
+ }
}
/* Adjust by relative CPU capacity of the group */
struct sg_lb_stats *local = &sds->local_stat;
struct sg_lb_stats tmp_sgs;
int load_idx, prefer_sibling = 0;
- bool overload = false, overutilized = false;
+ bool overload = false, overutilized = false, misfit_task = false;
if (child && child->flags & SD_PREFER_SIBLING)
prefer_sibling = 1;
}
update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
- &overload, &overutilized);
+ &overload, &overutilized,
+ &misfit_task);
if (local_group)
goto next_group;
sds->total_running += sgs->sum_nr_running;
sds->total_load += sgs->group_load;
sds->total_capacity += sgs->group_capacity;
+ sds->total_util += sgs->group_util;
sg = sg->next;
} while (sg != env->sd->groups);
/* update overload indicator if we are at root domain */
if (env->dst_rq->rd->overload != overload)
env->dst_rq->rd->overload = overload;
+ }
- /* Update over-utilization (tipping point, U >= 0) indicator */
- if (env->dst_rq->rd->overutilized != overutilized)
- env->dst_rq->rd->overutilized = overutilized;
- } else {
- if (!env->dst_rq->rd->overutilized && overutilized)
- env->dst_rq->rd->overutilized = true;
+ if (overutilized)
+ set_sd_overutilized(env->sd);
+ else
+ clear_sd_overutilized(env->sd);
+
+ /*
+ * If there is a misfit task in one cpu in this sched_domain
+ * it is likely that the imbalance cannot be sorted out among
+ * the cpu's in this sched_domain. In this case set the
+ * overutilized flag at the parent sched_domain.
+ */
+ if (misfit_task) {
+ struct sched_domain *sd = env->sd->parent;
+
+ /*
+ * In case of a misfit task, load balance at the parent
+ * sched domain level will make sense only if the the cpus
+ * have a different capacity. If cpus at a domain level have
+ * the same capacity, the misfit task cannot be well
+ * accomodated in any of the cpus and there in no point in
+ * trying a load balance at this level
+ */
+ while (sd) {
+ if (sd->flags & SD_ASYM_CPUCAPACITY) {
+ set_sd_overutilized(sd);
+ break;
+ }
+ sd = sd->parent;
+ }
}
+
+ /*
+ * If the domain util is greater that domain capacity, load balancing
+ * needs to be done at the next sched domain level as well.
+ */
+ if (lb_sd_parent(env->sd) &&
+ sds->total_capacity * 1024 < sds->total_util * capacity_margin)
+ set_sd_overutilized(env->sd->parent);
}
/**
*/
update_sd_lb_stats(env, &sds);
- if (energy_aware() && !env->dst_rq->rd->overutilized)
+ if (energy_aware() && !sd_overutilized(env->sd))
goto out_balanced;
local = &sds.local_stat;
}
max_cost += sd->max_newidle_lb_cost;
+ if (energy_aware() && !sd_overutilized(sd))
+ continue;
+
if (!(sd->flags & SD_LOAD_BALANCE))
continue;
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &curr->se;
+ struct sched_domain *sd;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
rq->misfit_task = !task_fits_capacity(curr, capacity_of(rq->cpu));
- if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr)))
- rq->rd->overutilized = true;
+ rcu_read_lock();
+ sd = rcu_dereference(rq->sd);
+ if (sd && !sd_overutilized(sd) && cpu_overutilized(task_cpu(curr)))
+ set_sd_overutilized(sd);
+ rcu_read_unlock();
}
/*