sched: Add cluster scheduler level for x86

author Tim Chen <tim.c.chen@linux.intel.com>

Fri, 24 Sep 2021 08:51:04 +0000 (20:51 +1200)

committer Peter Zijlstra <peterz@infradead.org>

Fri, 15 Oct 2021 09:25:16 +0000 (11:25 +0200)
author Tim Chen <tim.c.chen@linux.intel.com>
Fri, 24 Sep 2021 08:51:04 +0000 (20:51 +1200)
committer Peter Zijlstra <peterz@infradead.org>
Fri, 15 Oct 2021 09:25:16 +0000 (11:25 +0200)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index ab83c22..349e59b 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1001,6 +1001,17 @@ config NR_CPUS
           This is purely to save memory: each supported CPU adds about 8KB
           to the kernel image.
  
+config SCHED_CLUSTER
+       bool "Cluster scheduler support"
+       depends on SMP
+       default y
+       help
+         Cluster scheduler support improves the CPU scheduler's decision
+         making when dealing with machines that have clusters of CPUs.
+         Cluster usually means a couple of CPUs which are placed closely
+         by sharing mid-level caches, last-level cache tags or internal
+         busses.
+
  config SCHED_SMT
         def_bool y if SMP
  
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h

index 630ff08..08b0e90 100644 (file)
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -16,7 +16,9 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
  DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
  /* cpus sharing the last level cache: */
  DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
  DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
+DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id);
  DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
  
  static inline struct cpumask *cpu_llc_shared_mask(int cpu)
@@ -24,6 +26,11 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
         return per_cpu(cpu_llc_shared_map, cpu);
  }
  
+static inline struct cpumask *cpu_l2c_shared_mask(int cpu)
+{
+       return per_cpu(cpu_l2c_shared_map, cpu);
+}
+
  DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
  DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
  DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h

index 9239399..cc16477 100644 (file)
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -103,6 +103,7 @@ static inline void setup_node_to_cpumask_map(void) { }
  #include <asm-generic/topology.h>
  
  extern const struct cpumask *cpu_coregroup_mask(int cpu);
+extern const struct cpumask *cpu_clustergroup_mask(int cpu);
  
  #define topology_logical_package_id(cpu)       (cpu_data(cpu).logical_proc_id)
  #define topology_physical_package_id(cpu)      (cpu_data(cpu).phys_proc_id)
@@ -113,7 +114,9 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
  extern unsigned int __max_die_per_package;
  
  #ifdef CONFIG_SMP
+#define topology_cluster_id(cpu)               (per_cpu(cpu_l2c_id, cpu))
  #define topology_die_cpumask(cpu)              (per_cpu(cpu_die_map, cpu))
+#define topology_cluster_cpumask(cpu)          (cpu_clustergroup_mask(cpu))
  #define topology_core_cpumask(cpu)             (per_cpu(cpu_core_map, cpu))
  #define topology_sibling_cpumask(cpu)          (per_cpu(cpu_sibling_map, cpu))
  
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c

index b5e36bd..fe98a14 100644 (file)
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -846,6 +846,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
                 l2 = new_l2;
  #ifdef CONFIG_SMP
                 per_cpu(cpu_llc_id, cpu) = l2_id;
+               per_cpu(cpu_l2c_id, cpu) = l2_id;
  #endif
         }
  
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index 0f88859..1c7897c 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -85,6 +85,9 @@ u16 get_llc_id(unsigned int cpu)
  }
  EXPORT_SYMBOL_GPL(get_llc_id);
  
+/* L2 cache ID of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;
+
  /* correctly size the local cpu masks */
  void __init setup_cpu_local_masks(void)
  {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c

index 85f6e24..5094ab0 100644 (file)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,6 +101,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
  
  DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
  
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
+
  /* Per CPU bogomips and other parameters */
  DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
  EXPORT_PER_CPU_SYMBOL(cpu_info);
@@ -464,6 +466,21 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
         return false;
  }
  
+static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+       int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+       /* Do not match if we do not have a valid APICID for cpu: */
+       if (per_cpu(cpu_l2c_id, cpu1) == BAD_APICID)
+               return false;
+
+       /* Do not match if L2 cache id does not match: */
+       if (per_cpu(cpu_l2c_id, cpu1) != per_cpu(cpu_l2c_id, cpu2))
+               return false;
+
+       return topology_sane(c, o, "l2c");
+}
+
  /*
   * Unlike the other levels, we do not enforce keeping a
   * multicore group inside a NUMA node.  If this happens, we will
@@ -523,7 +540,7 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
  }
  
  
-#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC)
  static inline int x86_sched_itmt_flags(void)
  {
         return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
@@ -541,12 +558,21 @@ static int x86_smt_flags(void)
         return cpu_smt_flags() | x86_sched_itmt_flags();
  }
  #endif
+#ifdef CONFIG_SCHED_CLUSTER
+static int x86_cluster_flags(void)
+{
+       return cpu_cluster_flags() | x86_sched_itmt_flags();
+}
+#endif
  #endif
  
  static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
  #ifdef CONFIG_SCHED_SMT
         { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
  #endif
+#ifdef CONFIG_SCHED_CLUSTER
+       { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
+#endif
  #ifdef CONFIG_SCHED_MC
         { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
  #endif
@@ -557,6 +583,9 @@ static struct sched_domain_topology_level x86_topology[] = {
  #ifdef CONFIG_SCHED_SMT
         { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
  #endif
+#ifdef CONFIG_SCHED_CLUSTER
+       { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
+#endif
  #ifdef CONFIG_SCHED_MC
         { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
  #endif
@@ -584,6 +613,7 @@ void set_cpu_sibling_map(int cpu)
         if (!has_mp) {
                 cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
                 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
+               cpumask_set_cpu(cpu, cpu_l2c_shared_mask(cpu));
                 cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
                 cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
                 c->booted_cores = 1;
@@ -602,6 +632,9 @@ void set_cpu_sibling_map(int cpu)
                 if ((i == cpu) || (has_mp && match_llc(c, o)))
                         link_mask(cpu_llc_shared_mask, cpu, i);
  
+               if ((i == cpu) || (has_mp && match_l2c(c, o)))
+                       link_mask(cpu_l2c_shared_mask, cpu, i);
+
                 if ((i == cpu) || (has_mp && match_die(c, o)))
                         link_mask(topology_die_cpumask, cpu, i);
         }
@@ -652,6 +685,11 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
         return cpu_llc_shared_mask(cpu);
  }
  
+const struct cpumask *cpu_clustergroup_mask(int cpu)
+{
+       return cpu_l2c_shared_mask(cpu);
+}
+
  static void impress_friends(void)
  {
         int cpu;
@@ -1335,6 +1373,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
                 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
                 zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
                 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
+               zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL);
         }
  
         /*
@@ -1564,7 +1603,10 @@ static void remove_siblinginfo(int cpu)
  
         for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
                 cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
+       for_each_cpu(sibling, cpu_l2c_shared_mask(cpu))
+               cpumask_clear_cpu(cpu, cpu_l2c_shared_mask(sibling));
         cpumask_clear(cpu_llc_shared_mask(cpu));
+       cpumask_clear(cpu_l2c_shared_mask(cpu));
         cpumask_clear(topology_sibling_cpumask(cpu));
         cpumask_clear(topology_core_cpumask(cpu));
         cpumask_clear(topology_die_cpumask(cpu));
author	Tim Chen <tim.c.chen@linux.intel.com>
	Fri, 24 Sep 2021 08:51:04 +0000 (20:51 +1200)
committer	Peter Zijlstra <peterz@infradead.org>
	Fri, 15 Oct 2021 09:25:16 +0000 (11:25 +0200)
arch/x86/Kconfig		patch \| blob \| history
arch/x86/include/asm/smp.h		patch \| blob \| history
arch/x86/include/asm/topology.h		patch \| blob \| history
arch/x86/kernel/cpu/cacheinfo.c		patch \| blob \| history
arch/x86/kernel/cpu/common.c		patch \| blob \| history
arch/x86/kernel/smpboot.c		patch \| blob \| history