sched/power: add power and thermal governance
authorLukasz Luba <l.luba@partner.samsung.com>
Thu, 25 Oct 2018 16:27:09 +0000 (18:27 +0200)
committerLukasz Luba <l.luba@partner.samsung.com>
Fri, 17 May 2019 07:15:34 +0000 (09:15 +0200)
This patch add new feature for the scheduler and provides
connection with thermal subsystem which grants power to
cooling devices (in DVFS - CPU devices).
.....

Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
include/linux/sched/power.h [new file with mode: 0644]
kernel/sched/Makefile
kernel/sched/fair.c
kernel/sched/power.c [new file with mode: 0644]
kernel/sched/power.h [new file with mode: 0644]
kernel/sched/sched.h

diff --git a/include/linux/sched/power.h b/include/linux/sched/power.h
new file mode 100644 (file)
index 0000000..7827ba0
--- /dev/null
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Scheduler CPU power
+ *
+ *  Copyright (C) 2018 Samsung
+ */
+
+#ifndef __INC_SCHED_POWER_H__
+#define __INC_SCHED_POWER_H__
+
+
+
+int sched_power_cpu_reinit_weight(int cpu, int weight);
+
+#endif
index 7fe183404c383f8d4611d47eb9d4299994f8b7ee..c1ccc0a9dc9ba9a2651986a5014c5aac1f78cc7c 100644 (file)
@@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle.o fair.o rt.o deadline.o
 obj-y += wait.o wait_bit.o swait.o completion.o
 
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
+obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o power.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
index 908c9cdae2f0c05d4a4956844249da4517e2ca97..c03c709ccc686f9dd983e404730dc9209449ece0 100644 (file)
@@ -4172,6 +4172,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
         */
        update_curr(cfs_rq);
 
+
        /*
         * Ensure that runnable average is periodically updated.
         */
@@ -6357,6 +6358,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
        }
        rcu_read_unlock();
 
+       if (prev_cpu != new_cpu)
+               sched_power_change_cpu_weight(new_cpu, 512, 0);
+
        return new_cpu;
 }
 
@@ -9658,6 +9662,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 
        if (static_branch_unlikely(&sched_numa_balancing))
                task_tick_numa(rq, curr);
+
+       /* sched_power_change_cpu_weight(cpu_of(rq), 768, 0); */
 }
 
 /*
diff --git a/kernel/sched/power.c b/kernel/sched/power.c
new file mode 100644 (file)
index 0000000..c2fc081
--- /dev/null
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Scheduler CPU power
+ *
+ *  Copyright (C) 2018 Samsung
+ */
+
+
+#include <linux/sched.h>
+#include <linux/thermal.h>
+
+#include "power.h"
+
+#define THERMAL_REQUEST_KFIFO_SIZE     (64 * sizeof(struct power_request))
+#define DEFAULT_CPU_WEIGHT 1024
+
+static DEFINE_PER_CPU(struct cpu_power, cpu_power);
+DEFINE_PER_CPU(struct update_sched_power *, update_cpu_power);
+
+static struct sched_power sched_power;
+
+void sched_power_set_update_func(int cpu, struct update_sched_power *update,
+               void (*fn)(struct update_sched_power *, int, unsigned int, int,
+                          int))
+{
+
+       if (WARN_ON(!update || !fn))
+               return;
+
+       if (WARN_ON(per_cpu(update_cpu_power, cpu)))
+               return;
+
+       update->func = fn;
+       rcu_assign_pointer(per_cpu(update_cpu_power, cpu), update);
+}
+
+void sched_power_clean_update_func(int cpu)
+{
+       rcu_assign_pointer(per_cpu(update_cpu_power, cpu), NULL);
+}
+
+
+/////////////////////////////////////////////////////////////////////////
+
+
+unsigned int cpu_power_calc_group_weight(int cpu)
+{
+       cpumask_t *span_cpus = NULL;
+       struct cpu_power *power;
+       unsigned int w = 0;
+       int i;
+       int num_cpus;
+
+
+       num_cpus = cpumask_weight(span_cpus);
+
+       for_each_cpu(i, span_cpus) {
+               power = (&per_cpu(cpu_power, i));
+               w += power->weight;
+       }
+
+       if (num_cpus)
+               w /= num_cpus;
+
+       return w;
+}
+
+int get_state_for_power(int cpu, unsigned long power)
+{
+       /* unsigned long gr_load; */
+
+
+       return 0;
+}
+
+int cpu_power_calc_group_capacity(unsigned long gr_power, unsigned gr_weight,
+                                 int cpu)
+{
+       cpumask_t *span_cpus = NULL;
+       int num_cpus;
+       struct cpu_power *power;
+       unsigned long p;
+       int i, state;
+       /* int size = 0; */
+       unsigned long max_power = 0;
+
+       num_cpus = cpumask_weight(span_cpus);
+
+       for_each_cpu(i, span_cpus) {
+               power = (&per_cpu(cpu_power, i));
+               p = gr_power * (power->weight << 10) / gr_weight;
+               p >>= 10;
+
+               if (max_power < p)
+                       max_power = p;
+       }
+
+
+       state = get_state_for_power(cpu, max_power);
+
+
+       return 0;
+}
+
+int sched_power_cpu_reinit_weight(int cpu, int weight)
+{
+       struct cpu_power *cpower = &per_cpu(cpu_power, cpu);
+
+       if (!cpower->operating)
+               return -EAGAIN;
+
+       raw_spin_lock(&cpower->update_lock);
+       cpower->weight = weight;
+       raw_spin_unlock(&cpower->update_lock);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(sched_power_cpu_reinit_weight);
+
+//////////////////////////////////////////////////////////////
+
+
+static bool should_update_next_weight(int time)
+{
+       return 1;
+}
+
+static void sched_power_work(struct kthread_work *work)
+{
+       struct sched_power *sp = container_of(work, struct sched_power, work);
+       int i;
+       struct cpu_power *cpower = NULL;
+       struct power_request req;
+
+       for_each_online_cpu(i) {
+               cpower = (&per_cpu(cpu_power, i));
+               raw_spin_lock(&cpower->update_lock);
+               req = cpower->req;
+               cpower->req.time = 0;
+               raw_spin_unlock(&cpower->update_lock);
+
+               if (should_update_next_weight(req.time)) {
+                       pr_info("cpower req poped\n");
+                       thermal_cpu_cdev_set_weight(req.cpu, req.weight);
+               }
+       }
+
+       sp->work_in_progress = false;
+}
+
+static void sched_power_irq_work(struct irq_work *irq_work)
+{
+       struct sched_power *power;
+
+       power = container_of(irq_work, struct sched_power, irq_work);
+
+       kthread_queue_work(&power->worker, &power->work);
+}
+
+static void sched_power_update(struct update_sched_power *update, int cpu,
+                              unsigned int weight, int flags, int time)
+{
+       struct cpu_power *cpower = container_of(update, struct cpu_power,
+                                                  update_power);
+       struct sched_power *sp;
+
+       if (!cpower->operating)
+               return;
+
+       sp = cpower->sched_power;
+
+       /* Filter to frequent changes */
+       if (!should_update_next_weight(time))
+               return;
+
+       raw_spin_lock(&cpower->update_lock);
+       cpower->req.weight = weight;
+       cpower->req.cpu = cpu;
+       cpower->req.time = time;
+       raw_spin_unlock(&cpower->update_lock);
+
+       if (!sp->work_in_progress) {
+               sp->work_in_progress = true;
+               irq_work_queue(&sp->irq_work);
+       }
+}
+
+
+static int sched_power_create_thread(struct sched_power *power)
+{
+       int ret;
+       struct task_struct *thread;
+       struct sched_attr attr = {
+               .sched_policy = SCHED_DEADLINE,
+               .sched_nice = 0,
+               .sched_priority = 0,
+               .sched_flags = 0,
+               .sched_runtime  =  1000000,
+               .sched_deadline = 10000000,
+               .sched_period   = 10000000,
+       };
+
+       kthread_init_work(&power->work, sched_power_work);
+       kthread_init_worker(&power->worker);
+       thread = kthread_create(kthread_worker_fn, &power->worker,
+                               "sched_power/a");
+
+       if (IS_ERR(thread)) {
+               pr_err("failed to create sched_power thread %ld\n",
+                      PTR_ERR(thread));
+               return PTR_ERR(thread);
+       }
+
+       ret = sched_setattr_nocheck(thread, &attr);
+       if (ret) {
+               kthread_stop(thread);
+               pr_warn("failed to set SCHED_DEADLINE for sched_power %d\n",
+                       ret);
+               return ret;
+       }
+
+       power->thread = thread;
+       mutex_init(&power->work_lock);
+       init_irq_work(&power->irq_work, sched_power_irq_work);
+       wake_up_process(thread);
+
+       return 0;
+}
+
+static void sched_power_disable_thread(struct sched_power *sp)
+{
+       kthread_flush_worker(&sp->worker);
+       kthread_stop(sp->thread);
+       mutex_destroy(&sp->work_lock);
+}
+
+static int sched_power_setup(struct sched_power *sp)
+{
+       int i;
+       struct cpu_power *cpower;
+
+       for_each_possible_cpu(i) {
+               cpower = (&per_cpu(cpu_power, i));
+               cpower->weight = DEFAULT_CPU_WEIGHT;
+               cpower->sched_power = sp;
+               sched_power_set_update_func(i, &cpower->update_power,
+                                           sched_power_update);
+               raw_spin_lock_init(&cpower->update_lock);
+               cpower->operating = true;
+       }
+
+       return 0;
+}
+
+
+static int __init sched_power_init(void)
+{
+       int ret = 0;
+
+       ret = sched_power_create_thread(&sched_power);
+       if (ret)
+               return ret;
+
+       sched_power_setup(&sched_power);
+
+       return ret;
+}
+fs_initcall(sched_power_init);
diff --git a/kernel/sched/power.h b/kernel/sched/power.h
new file mode 100644 (file)
index 0000000..f08277e
--- /dev/null
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Scheduler CPU power
+ *
+ *  Copyright (C) 2018 Samsung
+ */
+
+#ifndef __SCHED_POWER_H__
+#define __SCHED_POWER_H__
+
+#include "sched.h"
+
+// struct update_sched_power {
+//     void (*func)(struct update_sched_power *, int, unsigned int, int);
+// };
+
+struct power_budget {
+       s64 temp;
+       s64 temp_limit;
+       s64 avail_power;
+};
+
+struct sched_power {
+       struct task_struct *thread;
+       struct irq_work irq_work;
+       struct kthread_work work;
+       struct kthread_worker worker;
+       bool work_in_progress;
+       struct mutex work_lock;
+};
+
+struct power_request {
+       unsigned int weight;
+       int cpu;
+       int time;
+};
+
+struct cpu_power {
+       struct update_sched_power update_power;
+       unsigned int max_capacity;
+       unsigned int capacity;
+       unsigned int vcapacity;
+       int opp_state;
+       u64 opp_power_cost;
+       unsigned long vidle;
+       unsigned int vrun; /* from 0..1024 (100%) */
+       unsigned int weight; /* 0..1024 (100%) */
+       struct sched_power *sched_power;
+       struct power_request req;
+       bool operating;
+       /* lock shared with thermal framework and/or cpufreq */
+       raw_spinlock_t update_lock;
+};
+
+
+#endif
index 9683f458aec72823dfed413b5dc0a262836aaf85..c1714ef7366982c1c442913fca7769fba38a9d1c 100644 (file)
@@ -2244,3 +2244,25 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
        return util;
 }
 #endif
+
+#ifdef CONFIG_THERMAL
+struct update_sched_power {
+       void (*func)(struct update_sched_power *, int, unsigned int, int, int);
+};
+DECLARE_PER_CPU(struct update_sched_power *, update_cpu_power);
+
+static inline void sched_power_change_cpu_weight(int cpu, unsigned long weight,
+                                                int flags)
+{
+       struct update_sched_power *update;
+       int time = 0;
+
+
+       update = rcu_dereference_sched(*per_cpu_ptr(&update_cpu_power, cpu));
+       if (update)
+               update->func(update, cpu, weight, flags, time);
+}
+#else
+static inline void sched_power_change_cpu_weight(int cpu, unsigned int weight,
+                                                int flags) {}
+#endif /* CONFIG_THERMAL */