Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

[platform/kernel/linux-starfive.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index d17c5da..a092f35 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -26,6 +26,7 @@
  #include <linux/profile.h>
  #include <linux/security.h>
  #include <linux/syscalls.h>
+#include <linux/sched/isolation.h>
  
  #include <asm/switch_to.h>
  #include <asm/tlb.h>
@@ -42,18 +43,21 @@
  
  DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  
+#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
  /*
   * Debugging: various feature bits
+ *
+ * If SCHED_DEBUG is disabled, each compilation unit has its own copy of
+ * sysctl_sched_features, defined in sched.h, to allow constants propagation
+ * at compile time and compiler optimization based on features default.
   */
-
  #define SCHED_FEAT(name, enabled)      \
         (1UL << __SCHED_FEAT_##name) * enabled |
-
  const_debug unsigned int sysctl_sched_features =
  #include "features.h"
         0;
-
  #undef SCHED_FEAT
+#endif
  
  /*
   * Number of tasks to iterate in a single balance run.
@@ -83,9 +87,6 @@ __read_mostly int scheduler_running;
   */
  int sysctl_sched_rt_runtime = 950000;
  
-/* CPUs with isolated domains */
-cpumask_var_t cpu_isolated_map;
-
  /*
   * __task_rq_lock - lock the rq @p resides on.
   */
@@ -505,8 +506,7 @@ void resched_cpu(int cpu)
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
  
-       if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-               return;
+       raw_spin_lock_irqsave(&rq->lock, flags);
         resched_curr(rq);
         raw_spin_unlock_irqrestore(&rq->lock, flags);
  }
@@ -526,7 +526,7 @@ int get_nohz_timer_target(void)
         int i, cpu = smp_processor_id();
         struct sched_domain *sd;
  
-       if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
+       if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
                 return cpu;
  
         rcu_read_lock();
@@ -535,15 +535,15 @@ int get_nohz_timer_target(void)
                         if (cpu == i)
                                 continue;
  
-                       if (!idle_cpu(i) && is_housekeeping_cpu(i)) {
+                       if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
                                 cpu = i;
                                 goto unlock;
                         }
                 }
         }
  
-       if (!is_housekeeping_cpu(cpu))
-               cpu = housekeeping_any_cpu();
+       if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
+               cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
  unlock:
         rcu_read_unlock();
         return cpu;
@@ -733,7 +733,7 @@ int tg_nop(struct task_group *tg, void *data)
  }
  #endif
  
-static void set_load_weight(struct task_struct *p)
+static void set_load_weight(struct task_struct *p, bool update_load)
  {
         int prio = p->static_prio - MAX_RT_PRIO;
         struct load_weight *load = &p->se.load;
@@ -747,8 +747,16 @@ static void set_load_weight(struct task_struct *p)
                 return;
         }
  
-       load->weight = scale_load(sched_prio_to_weight[prio]);
-       load->inv_weight = sched_prio_to_wmult[prio];
+       /*
+        * SCHED_OTHER tasks have to update their load when changing their
+        * weight
+        */
+       if (update_load && p->sched_class == &fair_sched_class) {
+               reweight_task(p, prio);
+       } else {
+               load->weight = scale_load(sched_prio_to_weight[prio]);
+               load->inv_weight = sched_prio_to_wmult[prio];
+       }
  }
  
  static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -2358,7 +2366,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
                         p->static_prio = NICE_TO_PRIO(0);
  
                 p->prio = p->normal_prio = __normal_prio(p);
-               set_load_weight(p);
+               set_load_weight(p, false);
  
                 /*
                  * We don't need the reset flag anymore after the fork. It has
@@ -3805,7 +3813,7 @@ void set_user_nice(struct task_struct *p, long nice)
                 put_prev_task(rq, p);
  
         p->static_prio = NICE_TO_PRIO(nice);
-       set_load_weight(p);
+       set_load_weight(p, true);
         old_prio = p->prio;
         p->prio = effective_prio(p);
         delta = p->prio - old_prio;
@@ -3962,7 +3970,7 @@ static void __setscheduler_params(struct task_struct *p,
          */
         p->rt_priority = attr->sched_priority;
         p->normal_prio = normal_prio(p);
-       set_load_weight(p);
+       set_load_weight(p, true);
  }
  
  /* Actually do priority change: must hold pi & rq lock. */
@@ -4842,6 +4850,7 @@ int __sched _cond_resched(void)
                 preempt_schedule_common();
                 return 1;
         }
+       rcu_all_qs();
         return 0;
  }
  EXPORT_SYMBOL(_cond_resched);
@@ -5165,6 +5174,7 @@ void sched_show_task(struct task_struct *p)
         show_stack(p, NULL);
         put_task_stack(p);
  }
+EXPORT_SYMBOL_GPL(sched_show_task);
  
  static inline bool
  state_filter_match(unsigned long state_filter, struct task_struct *p)
@@ -5726,10 +5736,6 @@ static inline void sched_init_smt(void) { }
  
  void __init sched_init_smp(void)
  {
-       cpumask_var_t non_isolated_cpus;
-
-       alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
-
         sched_init_numa();
  
         /*
@@ -5739,16 +5745,12 @@ void __init sched_init_smp(void)
          */
         mutex_lock(&sched_domains_mutex);
         sched_init_domains(cpu_active_mask);
-       cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
-       if (cpumask_empty(non_isolated_cpus))
-               cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
         mutex_unlock(&sched_domains_mutex);
  
         /* Move init over to a non-isolated CPU */
-       if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
+       if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
                 BUG();
         sched_init_granularity();
-       free_cpumask_var(non_isolated_cpus);
  
         init_sched_rt_class();
         init_sched_dl_class();
@@ -5933,7 +5935,7 @@ void __init sched_init(void)
                 atomic_set(&rq->nr_iowait, 0);
         }
  
-       set_load_weight(&init_task);
+       set_load_weight(&init_task, false);
  
         /*
          * The boot idle thread does lazy MMU switching as well:
@@ -5952,9 +5954,6 @@ void __init sched_init(void)
         calc_load_update = jiffies + LOAD_FREQ;
  
  #ifdef CONFIG_SMP
-       /* May be allocated at isolcpus cmdline parse time */
-       if (cpu_isolated_map == NULL)
-               zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
         idle_thread_set_boot_cpu();
         set_cpu_rq_start_time(smp_processor_id());
  #endif
@@ -6621,7 +6620,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
         return ret;
  }
  
-static int cpu_stats_show(struct seq_file *sf, void *v)
+static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
  {
         struct task_group *tg = css_tg(seq_css(sf));
         struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
@@ -6661,7 +6660,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
  }
  #endif /* CONFIG_RT_GROUP_SCHED */
  
-static struct cftype cpu_files[] = {
+static struct cftype cpu_legacy_files[] = {
  #ifdef CONFIG_FAIR_GROUP_SCHED
         {
                 .name = "shares",
@@ -6682,7 +6681,7 @@ static struct cftype cpu_files[] = {
         },
         {
                 .name = "stat",
-               .seq_show = cpu_stats_show,
+               .seq_show = cpu_cfs_stat_show,
         },
  #endif
  #ifdef CONFIG_RT_GROUP_SCHED
@@ -6700,16 +6699,182 @@ static struct cftype cpu_files[] = {
         { }     /* Terminate */
  };
  
+static int cpu_extra_stat_show(struct seq_file *sf,
+                              struct cgroup_subsys_state *css)
+{
+#ifdef CONFIG_CFS_BANDWIDTH
+       {
+               struct task_group *tg = css_tg(css);
+               struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+               u64 throttled_usec;
+
+               throttled_usec = cfs_b->throttled_time;
+               do_div(throttled_usec, NSEC_PER_USEC);
+
+               seq_printf(sf, "nr_periods %d\n"
+                          "nr_throttled %d\n"
+                          "throttled_usec %llu\n",
+                          cfs_b->nr_periods, cfs_b->nr_throttled,
+                          throttled_usec);
+       }
+#endif
+       return 0;
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
+                              struct cftype *cft)
+{
+       struct task_group *tg = css_tg(css);
+       u64 weight = scale_load_down(tg->shares);
+
+       return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
+}
+
+static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
+                               struct cftype *cft, u64 weight)
+{
+       /*
+        * cgroup weight knobs should use the common MIN, DFL and MAX
+        * values which are 1, 100 and 10000 respectively.  While it loses
+        * a bit of range on both ends, it maps pretty well onto the shares
+        * value used by scheduler and the round-trip conversions preserve
+        * the original value over the entire range.
+        */
+       if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
+               return -ERANGE;
+
+       weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
+
+       return sched_group_set_shares(css_tg(css), scale_load(weight));
+}
+
+static s64 cpu_weight_nice_read_s64(struct cgroup_subsys_state *css,
+                                   struct cftype *cft)
+{
+       unsigned long weight = scale_load_down(css_tg(css)->shares);
+       int last_delta = INT_MAX;
+       int prio, delta;
+
+       /* find the closest nice value to the current weight */
+       for (prio = 0; prio < ARRAY_SIZE(sched_prio_to_weight); prio++) {
+               delta = abs(sched_prio_to_weight[prio] - weight);
+               if (delta >= last_delta)
+                       break;
+               last_delta = delta;
+       }
+
+       return PRIO_TO_NICE(prio - 1 + MAX_RT_PRIO);
+}
+
+static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
+                                    struct cftype *cft, s64 nice)
+{
+       unsigned long weight;
+
+       if (nice < MIN_NICE || nice > MAX_NICE)
+               return -ERANGE;
+
+       weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO];
+       return sched_group_set_shares(css_tg(css), scale_load(weight));
+}
+#endif
+
+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
+                                                 long period, long quota)
+{
+       if (quota < 0)
+               seq_puts(sf, "max");
+       else
+               seq_printf(sf, "%ld", quota);
+
+       seq_printf(sf, " %ld\n", period);
+}
+
+/* caller should put the current value in *@periodp before calling */
+static int __maybe_unused cpu_period_quota_parse(char *buf,
+                                                u64 *periodp, u64 *quotap)
+{
+       char tok[21];   /* U64_MAX */
+
+       if (!sscanf(buf, "%s %llu", tok, periodp))
+               return -EINVAL;
+
+       *periodp *= NSEC_PER_USEC;
+
+       if (sscanf(tok, "%llu", quotap))
+               *quotap *= NSEC_PER_USEC;
+       else if (!strcmp(tok, "max"))
+               *quotap = RUNTIME_INF;
+       else
+               return -EINVAL;
+
+       return 0;
+}
+
+#ifdef CONFIG_CFS_BANDWIDTH
+static int cpu_max_show(struct seq_file *sf, void *v)
+{
+       struct task_group *tg = css_tg(seq_css(sf));
+
+       cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
+       return 0;
+}
+
+static ssize_t cpu_max_write(struct kernfs_open_file *of,
+                            char *buf, size_t nbytes, loff_t off)
+{
+       struct task_group *tg = css_tg(of_css(of));
+       u64 period = tg_get_cfs_period(tg);
+       u64 quota;
+       int ret;
+
+       ret = cpu_period_quota_parse(buf, &period, &quota);
+       if (!ret)
+               ret = tg_set_cfs_bandwidth(tg, period, quota);
+       return ret ?: nbytes;
+}
+#endif
+
+static struct cftype cpu_files[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       {
+               .name = "weight",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .read_u64 = cpu_weight_read_u64,
+               .write_u64 = cpu_weight_write_u64,
+       },
+       {
+               .name = "weight.nice",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .read_s64 = cpu_weight_nice_read_s64,
+               .write_s64 = cpu_weight_nice_write_s64,
+       },
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+       {
+               .name = "max",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = cpu_max_show,
+               .write = cpu_max_write,
+       },
+#endif
+       { }     /* terminate */
+};
+
  struct cgroup_subsys cpu_cgrp_subsys = {
         .css_alloc      = cpu_cgroup_css_alloc,
         .css_online     = cpu_cgroup_css_online,
         .css_released   = cpu_cgroup_css_released,
         .css_free       = cpu_cgroup_css_free,
+       .css_extra_stat_show = cpu_extra_stat_show,
         .fork           = cpu_cgroup_fork,
         .can_attach     = cpu_cgroup_can_attach,
         .attach         = cpu_cgroup_attach,
-       .legacy_cftypes = cpu_files,
+       .legacy_cftypes = cpu_legacy_files,
+       .dfl_cftypes    = cpu_files,
         .early_init     = true,
+       .threaded       = true,
  };
  
  #endif /* CONFIG_CGROUP_SCHED */