sched: Fix sched_setparam() policy == -1 logic

[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 36c951b..677ebad 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1108,6 +1108,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
         if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task)))
                 goto out;
  
+       trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
         ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
  
  out:
@@ -1321,7 +1322,7 @@ out:
                  * leave kernel.
                  */
                 if (p->mm && printk_ratelimit()) {
-                       printk_sched("process %d (%s) no longer affine to cpu%d\n",
+                       printk_deferred("process %d (%s) no longer affine to cpu%d\n",
                                         task_pid_nr(p), p->comm, cpu);
                 }
         }
@@ -1769,7 +1770,29 @@ void set_numabalancing_state(bool enabled)
         numabalancing_enabled = enabled;
  }
  #endif /* CONFIG_SCHED_DEBUG */
-#endif /* CONFIG_NUMA_BALANCING */
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_numa_balancing(struct ctl_table *table, int write,
+                        void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       struct ctl_table t;
+       int err;
+       int state = numabalancing_enabled;
+
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       t = *table;
+       t.data = &state;
+       err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+       if (err < 0)
+               return err;
+       if (write)
+               set_numabalancing_state(state);
+       return err;
+}
+#endif
+#endif
  
  /*
   * fork()/clone()-time setup:
@@ -1929,7 +1952,7 @@ static int dl_overflow(struct task_struct *p, int policy,
  {
  
         struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-       u64 period = attr->sched_period;
+       u64 period = attr->sched_period ?: attr->sched_deadline;
         u64 runtime = attr->sched_runtime;
         u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
         int cpus, err = -1;
@@ -2453,7 +2476,7 @@ u64 scheduler_tick_max_deferment(void)
         if (time_before_eq(next, now))
                 return 0;
  
-       return jiffies_to_usecs(next - now) * NSEC_PER_USEC;
+       return jiffies_to_nsecs(next - now);
  }
  #endif
  
@@ -3219,17 +3242,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr)
   * We ask for the deadline not being zero, and greater or equal
   * than the runtime, as well as the period of being zero or
   * greater than deadline. Furthermore, we have to be sure that
- * user parameters are above the internal resolution (1us); we
- * check sched_runtime only since it is always the smaller one.
+ * user parameters are above the internal resolution of 1us (we
+ * check sched_runtime only since it is always the smaller one) and
+ * below 2^63 ns (we have to check both sched_deadline and
+ * sched_period, as the latter can be zero).
   */
  static bool
  __checkparam_dl(const struct sched_attr *attr)
  {
-       return attr && attr->sched_deadline != 0 &&
-               (attr->sched_period == 0 ||
-               (s64)(attr->sched_period   - attr->sched_deadline) >= 0) &&
-               (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0  &&
-               attr->sched_runtime >= (2 << (DL_SCALE - 1));
+       /* deadline != 0 */
+       if (attr->sched_deadline == 0)
+               return false;
+
+       /*
+        * Since we truncate DL_SCALE bits, make sure we're at least
+        * that big.
+        */
+       if (attr->sched_runtime < (1ULL << DL_SCALE))
+               return false;
+
+       /*
+        * Since we use the MSB for wrap-around and sign issues, make
+        * sure it's not set (mind that period can be equal to zero).
+        */
+       if (attr->sched_deadline & (1ULL << 63) ||
+           attr->sched_period & (1ULL << 63))
+               return false;
+
+       /* runtime <= deadline <= period (if period != 0) */
+       if ((attr->sched_period != 0 &&
+            attr->sched_period < attr->sched_deadline) ||
+           attr->sched_deadline < attr->sched_runtime)
+               return false;
+
+       return true;
  }
  
  /*
@@ -3315,6 +3361,15 @@ recheck:
                                 return -EPERM;
                 }
  
+                /*
+                 * Can't set/change SCHED_DEADLINE policy at all for now
+                 * (safest behavior); in the future we would like to allow
+                 * unprivileged DL tasks to increase their relative deadline
+                 * or reduce their runtime (both ways reducing utilization)
+                 */
+               if (dl_policy(policy))
+                       return -EPERM;
+
                 /*
                  * Treat SCHED_IDLE as nice 20. Only allow a switch to
                  * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
@@ -3456,9 +3511,10 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
         };
  
         /*
-        * Fixup the legacy SCHED_RESET_ON_FORK hack
+        * Fixup the legacy SCHED_RESET_ON_FORK hack, except if
+        * the policy=-1 was passed by sched_setparam().
          */
-       if (policy & SCHED_RESET_ON_FORK) {
+       if ((policy != -1) && (policy & SCHED_RESET_ON_FORK)) {
                 attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
                 policy &= ~SCHED_RESET_ON_FORK;
                 attr.sched_policy = policy;
@@ -3638,17 +3694,22 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
   * @pid: the pid in question.
   * @uattr: structure containing the extended parameters.
   */
-SYSCALL_DEFINE2(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr)
+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
+                              unsigned int, flags)
  {
         struct sched_attr attr;
         struct task_struct *p;
         int retval;
  
-       if (!uattr || pid < 0)
+       if (!uattr || pid < 0 || flags)
                 return -EINVAL;
  
-       if (sched_copy_attr(uattr, &attr))
-               return -EFAULT;
+       retval = sched_copy_attr(uattr, &attr);
+       if (retval)
+               return retval;
+
+       if ((int)attr.sched_policy < 0)
+               return -EINVAL;
  
         rcu_read_lock();
         retval = -ESRCH;
@@ -3698,7 +3759,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
   */
  SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
  {
-       struct sched_param lp;
+       struct sched_param lp = { .sched_priority = 0 };
         struct task_struct *p;
         int retval;
  
@@ -3715,11 +3776,8 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
         if (retval)
                 goto out_unlock;
  
-       if (task_has_dl_policy(p)) {
-               retval = -EINVAL;
-               goto out_unlock;
-       }
-       lp.sched_priority = p->rt_priority;
+       if (task_has_rt_policy(p))
+               lp.sched_priority = p->rt_priority;
         rcu_read_unlock();
  
         /*
@@ -3763,7 +3821,7 @@ static int sched_read_attr(struct sched_attr __user *uattr,
                 attr->size = usize;
         }
  
-       ret = copy_to_user(uattr, attr, usize);
+       ret = copy_to_user(uattr, attr, attr->size);
         if (ret)
                 return -EFAULT;
  
@@ -3781,8 +3839,8 @@ err_size:
   * @uattr: structure containing the extended parameters.
   * @size: sizeof(attr) for fwd/bwd comp.
   */
-SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-               unsigned int, size)
+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
+               unsigned int, size, unsigned int, flags)
  {
         struct sched_attr attr = {
                 .size = sizeof(struct sched_attr),
@@ -3791,7 +3849,7 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
         int retval;
  
         if (!uattr || pid < 0 || size > PAGE_SIZE ||
-           size < SCHED_ATTR_SIZE_VER0)
+           size < SCHED_ATTR_SIZE_VER0 || flags)
                 return -EINVAL;
  
         rcu_read_lock();
@@ -4324,7 +4382,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
                 goto out_unlock;
  
         rq = task_rq_lock(p, &flags);
-       time_slice = p->sched_class->get_rr_interval(rq, p);
+       time_slice = 0;
+       if (p->sched_class->get_rr_interval)
+               time_slice = p->sched_class->get_rr_interval(rq, p);
         task_rq_unlock(rq, p, &flags);
  
         rcu_read_unlock();
@@ -4603,6 +4663,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
  
         /* TODO: This is not properly updating schedstats */
  
+       trace_sched_move_numa(p, curr_cpu, target_cpu);
         return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
  }
  
@@ -5015,7 +5076,6 @@ static int sched_cpu_active(struct notifier_block *nfb,
                                       unsigned long action, void *hcpu)
  {
         switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_STARTING:
         case CPU_DOWN_FAILED:
                 set_cpu_active((long)hcpu, true);
                 return NOTIFY_OK;
@@ -7396,6 +7456,7 @@ static int sched_dl_global_constraints(void)
         u64 period = global_rt_period();
         u64 new_bw = to_ratio(period, runtime);
         int cpu, ret = 0;
+       unsigned long flags;
  
         /*
          * Here we want to check the bandwidth not being set to some
@@ -7409,10 +7470,10 @@ static int sched_dl_global_constraints(void)
         for_each_possible_cpu(cpu) {
                 struct dl_bw *dl_b = dl_bw_of(cpu);
  
-               raw_spin_lock(&dl_b->lock);
+               raw_spin_lock_irqsave(&dl_b->lock, flags);
                 if (new_bw < dl_b->total_bw)
                         ret = -EBUSY;
-               raw_spin_unlock(&dl_b->lock);
+               raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  
                 if (ret)
                         break;
@@ -7425,6 +7486,7 @@ static void sched_dl_do_global(void)
  {
         u64 new_bw = -1;
         int cpu;
+       unsigned long flags;
  
         def_dl_bandwidth.dl_period = global_rt_period();
         def_dl_bandwidth.dl_runtime = global_rt_runtime();
@@ -7438,9 +7500,9 @@ static void sched_dl_do_global(void)
         for_each_possible_cpu(cpu) {
                 struct dl_bw *dl_b = dl_bw_of(cpu);
  
-               raw_spin_lock(&dl_b->lock);
+               raw_spin_lock_irqsave(&dl_b->lock, flags);
                 dl_b->bw = new_bw;
-               raw_spin_unlock(&dl_b->lock);
+               raw_spin_unlock_irqrestore(&dl_b->lock, flags);
         }
  }
  
@@ -7449,7 +7511,8 @@ static int sched_rt_global_validate(void)
         if (sysctl_sched_rt_period <= 0)
                 return -EINVAL;
  
-       if (sysctl_sched_rt_runtime > sysctl_sched_rt_period)
+       if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
+               (sysctl_sched_rt_runtime > sysctl_sched_rt_period))
                 return -EINVAL;
  
         return 0;
@@ -7852,15 +7915,14 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
         return ret;
  }
  
-static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft,
-               struct cgroup_map_cb *cb)
+static int cpu_stats_show(struct seq_file *sf, void *v)
  {
-       struct task_group *tg = css_tg(css);
+       struct task_group *tg = css_tg(seq_css(sf));
         struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
  
-       cb->fill(cb, "nr_periods", cfs_b->nr_periods);
-       cb->fill(cb, "nr_throttled", cfs_b->nr_throttled);
-       cb->fill(cb, "throttled_time", cfs_b->throttled_time);
+       seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods);
+       seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled);
+       seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
  
         return 0;
  }
@@ -7914,7 +7976,7 @@ static struct cftype cpu_files[] = {
         },
         {
                 .name = "stat",
-               .read_map = cpu_stats_show,
+               .seq_show = cpu_stats_show,
         },
  #endif
  #ifdef CONFIG_RT_GROUP_SCHED