HMP: Implement task packing for small tasks in HMP systems
authorChris Redpath <chris.redpath@arm.com>
Tue, 3 Feb 2015 12:45:54 +0000 (21:45 +0900)
committerSeung-Woo Kim <sw0312.kim@samsung.com>
Wed, 14 Dec 2016 04:41:50 +0000 (13:41 +0900)
If we wake up a task on a little CPU, fill CPUs rather than
spread. Adds 2 new files to sys/kernel/hmp to control packing
behaviour.

packing_enable: task packing enabled (1) or disabled (0)
packing_limit: Runqueues will be filled up to this load ratio.

This functionality is disabled by default on TC2 as it lacks per-cpu
power gating so packing small tasks there doesn't make sense.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
[k.kozlowski: rebased on 4.1, no signed-off-by of previous committer]
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
arch/arm64/Kconfig
kernel/sched/fair.c

index c14512a..791a667 100644 (file)
@@ -600,6 +600,18 @@ config HMP_FREQUENCY_INVARIANT_SCALE
          migration strategy to interact more predictably with CPUFreq's
          asynchronous compute capacity changes.
 
+config SCHED_HMP_LITTLE_PACKING
+       bool "Small task packing for HMP"
+       depends on SCHED_HMP
+       default n
+       help
+         Allows the HMP Scheduler to pack small tasks into CPUs in the
+         smallest HMP domain.
+         Controlled by two sysfs files in sys/kernel/hmp.
+         packing_enable: 1 to enable, 0 to disable packing. Default 1.
+         packing_limit: runqueue load ratio where a RQ is considered
+           to be full. Default is NICE_0_LOAD * 9/8.
+
 config NR_CPUS
        int "Maximum number of CPUs (2-4096)"
        range 2 4096
index 1028043..fa47df3 100644 (file)
@@ -2515,11 +2515,7 @@ struct hmp_global_attr {
        int (*from_sysfs)(int);
 };
 
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-#define HMP_DATA_SYSFS_MAX 4
-#else
-#define HMP_DATA_SYSFS_MAX 3
-#endif
+#define HMP_DATA_SYSFS_MAX 8
 
 struct hmp_data_struct {
 #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -5163,6 +5159,13 @@ static struct sched_entity *hmp_get_lightest_task(
  * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio)
  * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms)
  * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms)
+ *
+ * Small Task Packing:
+ * We can choose to fill the littlest CPUs in an HMP system rather than
+ * the typical spreading mechanic. This behavior is controllable using
+ * two variables.
+ * hmp_packing_enabled: runtime control over pack/spread
+ * hmp_full_threshold: Consider a CPU with this much unweighted load full
  */
 unsigned int hmp_up_threshold = 700;
 unsigned int hmp_down_threshold = 512;
@@ -5172,11 +5175,27 @@ unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL);
 unsigned int hmp_next_up_threshold = 4096;
 unsigned int hmp_next_down_threshold = 4096;
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+unsigned int hmp_packing_enabled = 1;
+#ifndef CONFIG_ARCH_VEXPRESS_TC2
+unsigned int hmp_full_threshold = (NICE_0_LOAD * 9) / 8;
+#else
+/* TC2 has a sharp consumption curve @ around 800Mhz, so
+   we aim to spread the load around that frequency. */
+unsigned int hmp_full_threshold = 650;  /*  80% of the 800Mhz freq * NICE_0_LOAD */
+#endif
+#endif
+
 static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se);
 static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
 static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
                                                int *min_cpu, struct cpumask *affinity);
 
+static inline struct hmp_domain *hmp_smallest_domain(void)
+{
+       return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
+}
+
 /* Check if cpu is in fastest hmp_domain */
 static inline unsigned int hmp_cpu_is_fastest(int cpu)
 {
@@ -5256,6 +5275,49 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
        return lowest_cpu;
 }
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Select the 'best' candidate little CPU to wake up on.
+ * Implements a packing strategy which examines CPU in
+ * logical CPU order, and selects the first which will
+ * have at least 10% capacity available, according to
+ * both tracked load of the runqueue and the task.
+ */
+static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk,
+               int cpu) {
+       int tmp_cpu;
+       unsigned long estimated_load;
+       struct hmp_domain *hmp;
+       struct sched_avg *avg;
+       struct cpumask allowed_hmp_cpus;
+
+       if(!hmp_packing_enabled ||
+                       tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100))
+               return hmp_select_slower_cpu(tsk, cpu);
+
+       if (hmp_cpu_is_slowest(cpu))
+               hmp = hmp_cpu_domain(cpu);
+       else
+               hmp = hmp_slower_domain(cpu);
+
+       /* respect affinity */
+       cpumask_and(&allowed_hmp_cpus, &hmp->cpus,
+                       tsk_cpus_allowed(tsk));
+
+       for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) {
+               avg = &cpu_rq(tmp_cpu)->avg;
+               /* estimate new rq load if we add this task */
+               estimated_load = avg->load_avg_ratio +
+                               tsk->se.avg.load_avg_ratio;
+               if (estimated_load <= hmp_full_threshold) {
+                       cpu = tmp_cpu;
+                       break;
+               }
+       }
+       /* if no match was found, the task uses the initial value */
+       return cpu;
+}
+#endif
 static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
 {
        /* hack - always use clock from first online CPU */
@@ -5380,6 +5442,15 @@ static int hmp_freqinvar_from_sysfs(int value)
        return value;
 }
 #endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/* packing value must be non-negative */
+static int hmp_packing_from_sysfs(int value)
+{
+       if (value < 0)
+               return -1;
+       return value;
+}
+#endif
 static void hmp_attr_add(
        const char *name,
        int *value,
@@ -5433,6 +5504,16 @@ static int hmp_attr_init(void)
                NULL,
                hmp_freqinvar_from_sysfs);
 #endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+       hmp_attr_add("packing_enable",
+               &hmp_packing_enabled,
+               NULL,
+               hmp_freqinvar_from_sysfs);
+       hmp_attr_add("packing_limit",
+               &hmp_full_threshold,
+               NULL,
+               hmp_packing_from_sysfs);
+#endif
        hmp_data.attr_group.name = "hmp";
        hmp_data.attr_group.attrs = hmp_data.attributes;
        ret = sysfs_create_group(kernel_kobj,
@@ -5681,10 +5762,17 @@ unlock:
                return new_cpu;
        }
        if (hmp_down_migration(prev_cpu, &p->se)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+               new_cpu = hmp_best_little_cpu(p, prev_cpu);
+#else
+
                new_cpu = hmp_select_slower_cpu(p, prev_cpu);
-               hmp_next_down_delay(&p->se, new_cpu);
-               trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
-               return new_cpu;
+#endif
+               if (new_cpu != prev_cpu) {
+                       hmp_next_down_delay(&p->se, new_cpu);
+                       trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
+                       return new_cpu;
+               }
        }
        /* Make sure that the task stays in its previous hmp domain */
        if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
@@ -8189,19 +8277,50 @@ static struct {
        unsigned long next_balance;     /* in jiffy units */
 } nohz ____cacheline_aligned;
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Decide if the tasks on the busy CPUs in the
+ * littlest domain would benefit from an idle balance
+ */
+static int hmp_packing_ilb_needed(int cpu)
+{
+       struct hmp_domain *hmp;
+       /* always allow ilb on non-slowest domain */
+       if (!hmp_cpu_is_slowest(cpu))
+               return 1;
+
+       hmp = hmp_cpu_domain(cpu);
+       for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
+               /* only idle balance if a CPU is loaded over threshold */
+               if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold)
+                       return 1;
+       }
+       return 0;
+}
+#endif
+
 static inline int find_new_ilb(void)
 {
        int ilb = cpumask_first(nohz.idle_cpus_mask);
 
 #ifdef CONFIG_SCHED_HMP
        int call_cpu = smp_processor_id();
+       int ilb_needed = 1;
+
        /* restrict nohz balancing to occur in the same hmp domain */
        ilb = cpumask_first_and(nohz.idle_cpus_mask,
                        &((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+       if (ilb < nr_cpu_ids)
+               ilb_needed = hmp_packing_ilb_needed(ilb);
 #endif
 
+       if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))
+               return ilb;
+#else
        if (ilb < nr_cpu_ids && idle_cpu(ilb))
                return ilb;
+#endif
 
        return nr_cpu_ids;
 }
@@ -8612,8 +8731,14 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
        struct task_struct *p = task_of(se);
        u64 now;
 
-       if (hmp_cpu_is_slowest(cpu))
+       if (hmp_cpu_is_slowest(cpu)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+               if(hmp_packing_enabled)
+                       return 1;
+               else
+#endif
                return 0;
+       }
 
 #ifdef CONFIG_SCHED_HMP_PRIO_FILTER
        /* Filter by task priority */