Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 6 Mar 2019 18:31:36 +0000 (10:31 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 6 Mar 2019 18:31:36 +0000 (10:31 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 6 Mar 2019 18:31:36 +0000 (10:31 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 6 Mar 2019 18:31:36 +0000 (10:31 -0800)
diff --combined MAINTAINERS

index bd54961,5f8d505..c7d3e51
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -1036,26 -1036,26 +1036,26 @@@ F:   drivers/net/appletalk
   F:    net/appletalk/
   
   APPLIED MICRO (APM) X-GENE DEVICE TREE SUPPORT
- -M:    Duc Dang <dhdang@apm.com>
+ +M:    Khuong Dinh <khuong@os.amperecomputing.com>
   S:    Supported
   F:    arch/arm64/boot/dts/apm/
   
   APPLIED MICRO (APM) X-GENE SOC EDAC
- -M:    Loc Ho <lho@apm.com>
+ +M:    Khuong Dinh <khuong@os.amperecomputing.com>
   S:    Supported
   F:    drivers/edac/xgene_edac.c
   F:    Documentation/devicetree/bindings/edac/apm-xgene-edac.txt
   
   APPLIED MICRO (APM) X-GENE SOC ETHERNET (V2) DRIVER
- -M:    Iyappan Subramanian <isubramanian@apm.com>
- -M:    Keyur Chudgar <kchudgar@apm.com>
+ +M:    Iyappan Subramanian <iyappan@os.amperecomputing.com>
+ +M:    Keyur Chudgar <keyur@os.amperecomputing.com>
   S:    Supported
   F:    drivers/net/ethernet/apm/xgene-v2/
   
   APPLIED MICRO (APM) X-GENE SOC ETHERNET DRIVER
- -M:    Iyappan Subramanian <isubramanian@apm.com>
- -M:    Keyur Chudgar <kchudgar@apm.com>
- -M:    Quan Nguyen <qnguyen@apm.com>
+ +M:    Iyappan Subramanian <iyappan@os.amperecomputing.com>
+ +M:    Keyur Chudgar <keyur@os.amperecomputing.com>
+ +M:    Quan Nguyen <quan@os.amperecomputing.com>
   S:    Supported
   F:    drivers/net/ethernet/apm/xgene/
   F:    drivers/net/phy/mdio-xgene.c
@@@ -1063,7 -1063,7 +1063,7 @@@ F:      Documentation/devicetree/bindings/ne
   F:    Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
   
   APPLIED MICRO (APM) X-GENE SOC PMU
- -M:    Tai Nguyen <ttnguyen@apm.com>
+ +M:    Khuong Dinh <khuong@os.amperecomputing.com>
   S:    Supported
   F:    drivers/perf/xgene_pmu.c
   F:    Documentation/perf/xgene-pmu.txt
@@@ -1371,13 -1371,6 +1371,13 @@@ F:    arch/arm/mach-aspeed
   F:    arch/arm/boot/dts/aspeed-*
   N:    aspeed
   
+ +ARM/BITMAIN ARCHITECTURE
+ +M:    Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +S:    Maintained
+ +F:    arch/arm64/boot/dts/bitmain/
+ +F:    Documentation/devicetree/bindings/arm/bitmain.yaml
+ +
   ARM/CALXEDA HIGHBANK ARCHITECTURE
   M:    Rob Herring <robh@kernel.org>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1537,14 -1530,21 +1537,14 @@@ ARM/FREESCALE IMX / MXC ARM ARCHITECTUR
   M:    Shawn Guo <shawnguo@kernel.org>
   M:    Sascha Hauer <s.hauer@pengutronix.de>
   R:    Pengutronix Kernel Team <kernel@pengutronix.de>
- -R:    Fabio Estevam <fabio.estevam@nxp.com>
+ +R:    Fabio Estevam <festevam@gmail.com>
   R:    NXP Linux Team <linux-imx@nxp.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
- -F:    arch/arm/mach-imx/
- -F:    arch/arm/mach-mxs/
- -F:    arch/arm/boot/dts/imx*
- -F:    arch/arm/configs/imx*_defconfig
- -F:    arch/arm64/boot/dts/freescale/imx*
- -F:    drivers/clk/imx/
- -F:    drivers/firmware/imx/
- -F:    drivers/soc/imx/
- -F:    include/linux/firmware/imx/
- -F:    include/soc/imx/
+ +N:    imx
+ +N:    mxs
+ +X:    drivers/media/i2c/
   
   ARM/FREESCALE VYBRID ARM ARCHITECTURE
   M:    Shawn Guo <shawnguo@kernel.org>
@@@ -1947,37 -1947,19 +1947,37 @@@ M:   David Brown <david.brown@linaro.org
   L:    linux-arm-msm@vger.kernel.org
   S:    Maintained
   F:    Documentation/devicetree/bindings/soc/qcom/
+ +F:    Documentation/devicetree/bindings/*/qcom*
   F:    arch/arm/boot/dts/qcom-*.dts
   F:    arch/arm/boot/dts/qcom-*.dtsi
   F:    arch/arm/mach-qcom/
- -F:    arch/arm64/boot/dts/qcom/*
+ +F:    arch/arm64/boot/dts/qcom/
+ +F:    drivers/*/qcom/
+ +F:    drivers/*/qcom*
+ +F:    drivers/*/*/qcom/
+ +F:    drivers/*/*/qcom*
+ +F:    drivers/*/pm8???-*
+ +F:    drivers/bluetooth/btqcomsmd.c
+ +F:    drivers/clocksource/timer-qcom.c
+ +F:    drivers/extcon/extcon-qcom*
+ +F:    drivers/iommu/msm*
   F:    drivers/i2c/busses/i2c-qup.c
- -F:    drivers/clk/qcom/
- -F:    drivers/dma/qcom/
- -F:    drivers/soc/qcom/
+ +F:    drivers/i2c/busses/i2c-qcom-geni.c
+ +F:    drivers/mfd/ssbi.c
+ +F:    drivers/mmc/host/mmci_qcom*
+ +F:    drivers/mmc/host/sdhci_msm.c
+ +F:    drivers/pci/controller/dwc/pcie-qcom.c
+ +F:    drivers/phy/qualcomm/
+ +F:    drivers/power/*/msm*
+ +F:    drivers/reset/reset-qcom-*
+ +F:    drivers/scsi/ufs/ufs-qcom.*
   F:    drivers/spi/spi-qup.c
+ +F:    drivers/spi/spi-geni-qcom.c
+ +F:    drivers/spi/spi-qcom-qspi.c
   F:    drivers/tty/serial/msm_serial.c
- -F:    drivers/*/pm8???-*
- -F:    drivers/mfd/ssbi.c
- -F:    drivers/firmware/qcom_scm*
+ +F:    drivers/usb/dwc3/dwc3-qcom.c
+ +F:    include/dt-bindings/*/qcom*
+ +F:    include/linux/*/qcom*
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/agross/linux.git
   
   ARM/RADISYS ENP2611 MACHINE SUPPORT
@@@ -2014,7 -1996,7 +2014,7 @@@ Q:      http://patchwork.kernel.org/project/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
   S:    Supported
   F:    arch/arm64/boot/dts/renesas/
- -F:    Documentation/devicetree/bindings/arm/shmobile.txt
+ +F:    Documentation/devicetree/bindings/arm/renesas.yaml
   F:    drivers/soc/renesas/
   F:    include/linux/soc/renesas/
   
@@@ -2126,8 -2108,6 +2126,8 @@@ Q:      http://patchwork.kernel.org/project/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
   S:    Supported
   F:    arch/arm/boot/dts/emev2*
+ +F:    arch/arm/boot/dts/gr-peach*
+ +F:    arch/arm/boot/dts/iwg20d-q7*
   F:    arch/arm/boot/dts/r7s*
   F:    arch/arm/boot/dts/r8a*
   F:    arch/arm/boot/dts/r9a*
@@@ -2135,7 -2115,7 +2135,7 @@@ F:      arch/arm/boot/dts/sh
   F:    arch/arm/configs/shmobile_defconfig
   F:    arch/arm/include/debug/renesas-scif.S
   F:    arch/arm/mach-shmobile/
- -F:    Documentation/devicetree/bindings/arm/shmobile.txt
+ +F:    Documentation/devicetree/bindings/arm/renesas.yaml
   F:    drivers/soc/renesas/
   F:    include/linux/soc/renesas/
   
@@@ -2628,7 -2608,6 +2628,7 @@@ L:      linux-kernel@vger.kernel.or
   S:    Maintained
   F:    arch/*/include/asm/atomic*.h
   F:    include/*/atomic*.h
+ +F:    scripts/atomic/
   
   ATTO EXPRESSSAS SAS/SATA RAID SCSI DRIVER
   M:    Bradley Grove <linuxdrivers@attotech.com>
@@@ -9835,6 -9814,14 +9835,14 @@@ F:    kernel/sched/membarrier.
   F:    include/uapi/linux/membarrier.h
   F:    arch/powerpc/include/asm/membarrier.h
   
+ MEMBLOCK
+ M:    Mike Rapoport <rppt@linux.ibm.com>
+ L:    linux-mm@kvack.org
+ S:    Maintained
+ F:    include/linux/memblock.h
+ F:    mm/memblock.c
+ F:    Documentation/core-api/boot-time-mm.rst
+ 
   MEMORY MANAGEMENT
   L:    linux-mm@kvack.org
   W:    http://www.linux-mm.org
@@@ -9908,11 -9895,6 +9916,11 @@@ S:    Maintaine
   F:    drivers/mtd/nand/raw/meson_*
   F:    Documentation/devicetree/bindings/mtd/amlogic,meson-nand.txt
   
+ +METHODE UDPU SUPPORT
+ +M:    Vladimir Vid <vladimir.vid@sartura.hr>
+ +S:    Maintained
+ +F:    arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts
+ +
   MICROBLAZE ARCHITECTURE
   M:    Michal Simek <monstr@monstr.eu>
   W:    http://www.monstr.eu/fdt/
@@@ -10853,12 -10835,6 +10861,12 @@@ F: drivers/power/supply/bq27xxx_battery
   F:    drivers/power/supply/isp1704_charger.c
   F:    drivers/power/supply/rx51_battery.c
   
+ +NOLIBC HEADER FILE
+ +M:    Willy Tarreau <w@1wt.eu>
+ +S:    Maintained
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/wtarreau/nolibc.git
+ +F:    tools/include/nolibc/
+ +
   NTB AMD DRIVER
   M:    Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
   L:    linux-ntb@googlegroups.com
@@@ -11339,11 -11315,6 +11347,11 @@@ M: Jens Wiklander <jens.wiklander@linar
   S:    Maintained
   F:    drivers/tee/optee/
   
+ +OP-TEE RANDOM NUMBER GENERATOR (RNG) DRIVER
+ +M:    Sumit Garg <sumit.garg@linaro.org>
+ +S:    Maintained
+ +F:    drivers/char/hw_random/optee-rng.c
+ +
   OPA-VNIC DRIVER
   M:    Dennis Dalessandro <dennis.dalessandro@intel.com>
   M:    Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
@@@ -11643,7 -11614,7 +11651,7 @@@ F:   Documentation/devicetree/bindings/pc
   F:    drivers/pci/controller/pcie-altera.c
   
   PCI DRIVER FOR APPLIEDMICRO XGENE
- -M:    Tanmay Inamdar <tinamdar@apm.com>
+ +M:    Toan Le <toan@os.amperecomputing.com>
   L:    linux-pci@vger.kernel.org
   L:    linux-arm-kernel@lists.infradead.org
   S:    Maintained
@@@ -11821,7 -11792,7 +11829,7 @@@ F:   Documentation/devicetree/bindings/pc
   F:    drivers/pci/controller/pcie-altera-msi.c
   
   PCI MSI DRIVER FOR APPLIEDMICRO XGENE
- -M:    Duc Dang <dhdang@apm.com>
+ +M:    Toan Le <toan@os.amperecomputing.com>
   L:    linux-pci@vger.kernel.org
   L:    linux-arm-kernel@lists.infradead.org
   S:    Maintained
@@@ -12310,6 -12281,14 +12318,6 @@@ S:  Maintaine
   F:    drivers/net/ppp/pptp.c
   W:    http://sourceforge.net/projects/accel-pptp
   
- -PREEMPTIBLE KERNEL
- -M:    Robert Love <rml@tech9.net>
- -L:    kpreempt-tech@lists.sourceforge.net
- -W:    https://www.kernel.org/pub/linux/kernel/people/rml/preempt-kernel
- -S:    Supported
- -F:    Documentation/preempt-locking.txt
- -F:    include/linux/preempt.h
- -
   PRINTK
   M:    Petr Mladek <pmladek@suse.com>
   M:    Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
@@@ -13047,7 -13026,6 +13055,7 @@@ F:   drivers/reset
   F:    Documentation/devicetree/bindings/reset/
   F:    include/dt-bindings/reset/
   F:    include/linux/reset.h
+ +F:    include/linux/reset/
   F:    include/linux/reset-controller.h
   
   RESTARTABLE SEQUENCES SUPPORT
@@@ -13548,7 -13526,6 +13556,7 @@@ F:   kernel/sched
   F:    include/linux/sched.h
   F:    include/uapi/linux/sched.h
   F:    include/linux/wait.h
+ +F:    include/linux/preempt.h
   
   SCR24X CHIP CARD INTERFACE DRIVER
   M:    Lubomir Rintel <lkundrak@v3.sk>
@@@ -14790,7 -14767,7 +14798,7 @@@ S:   Maintaine
   F:    drivers/tty/serial/8250/8250_dw.c
   
   SYNOPSYS DESIGNWARE APB GPIO DRIVER
- -M:    Hoan Tran <hotran@apm.com>
+ +M:    Hoan Tran <hoan@os.amperecomputing.com>
   L:    linux-gpio@vger.kernel.org
   S:    Maintained
   F:    drivers/gpio/gpio-dwapb.c
diff --combined fs/proc/stat.c

index 7617521,49aa0a2..80c305f
--- 1/fs/proc/stat.c
--- 2/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@@ -23,21 -23,21 +23,21 @@@
   
   #ifdef arch_idle_time
   
- static u64 get_idle_time(int cpu)
+ static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
   {
         u64 idle;
   
-       idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+       idle = kcs->cpustat[CPUTIME_IDLE];
         if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
                 idle += arch_idle_time(cpu);
         return idle;
   }
   
- static u64 get_iowait_time(int cpu)
+ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
   {
         u64 iowait;
   
-       iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+       iowait = kcs->cpustat[CPUTIME_IOWAIT];
         if (cpu_online(cpu) && nr_iowait_cpu(cpu))
                 iowait += arch_idle_time(cpu);
         return iowait;
@@@ -45,7 -45,7 +45,7 @@@
   
   #else
   
- static u64 get_idle_time(int cpu)
+ static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
   {
         u64 idle, idle_usecs = -1ULL;
   
@@@ -54,14 -54,14 +54,14 @@@
   
         if (idle_usecs == -1ULL)
                 /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
-               idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+               idle = kcs->cpustat[CPUTIME_IDLE];
         else
                 idle = idle_usecs * NSEC_PER_USEC;
   
         return idle;
   }
   
- static u64 get_iowait_time(int cpu)
+ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
   {
         u64 iowait, iowait_usecs = -1ULL;
   
@@@ -70,7 -70,7 +70,7 @@@
   
         if (iowait_usecs == -1ULL)
                 /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
-               iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+               iowait = kcs->cpustat[CPUTIME_IOWAIT];
         else
                 iowait = iowait_usecs * NSEC_PER_USEC;
   
@@@ -79,31 -79,6 +79,31 @@@
   
   #endif
   
+ +static void show_irq_gap(struct seq_file *p, unsigned int gap)
+ +{
+ +      static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0";
+ +
+ +      while (gap > 0) {
+ +              unsigned int inc;
+ +
+ +              inc = min_t(unsigned int, gap, ARRAY_SIZE(zeros) / 2);
+ +              seq_write(p, zeros, 2 * inc);
+ +              gap -= inc;
+ +      }
+ +}
+ +
+ +static void show_all_irqs(struct seq_file *p)
+ +{
+ +      unsigned int i, next = 0;
+ +
+ +      for_each_active_irq(i) {
+ +              show_irq_gap(p, i - next);
+ +              seq_put_decimal_ull(p, " ", kstat_irqs_usr(i));
+ +              next = i + 1;
+ +      }
+ +      show_irq_gap(p, nr_irqs - next);
+ +}
+ +
   static int show_stat(struct seq_file *p, void *v)
   {
         int i, j;
@@@ -120,16 -95,18 +120,18 @@@
         getboottime64(&boottime);
   
         for_each_possible_cpu(i) {
-               user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
-               nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
-               system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
-               idle += get_idle_time(i);
-               iowait += get_iowait_time(i);
-               irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
-               softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
-               steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
-               guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
-               guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+               struct kernel_cpustat *kcs = &kcpustat_cpu(i);
+ 
+               user += kcs->cpustat[CPUTIME_USER];
+               nice += kcs->cpustat[CPUTIME_NICE];
+               system += kcs->cpustat[CPUTIME_SYSTEM];
+               idle += get_idle_time(kcs, i);
+               iowait += get_iowait_time(kcs, i);
+               irq += kcs->cpustat[CPUTIME_IRQ];
+               softirq += kcs->cpustat[CPUTIME_SOFTIRQ];
+               steal += kcs->cpustat[CPUTIME_STEAL];
+               guest += kcs->cpustat[CPUTIME_GUEST];
+               guest_nice += kcs->cpustat[CPUTIME_GUEST_NICE];
                 sum += kstat_cpu_irqs_sum(i);
                 sum += arch_irq_stat_cpu(i);
   
@@@ -155,17 -132,19 +157,19 @@@
         seq_putc(p, '\n');
   
         for_each_online_cpu(i) {
+               struct kernel_cpustat *kcs = &kcpustat_cpu(i);
+ 
                 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
-               user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
-               nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
-               system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
-               idle = get_idle_time(i);
-               iowait = get_iowait_time(i);
-               irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
-               softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
-               steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
-               guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
-               guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+               user = kcs->cpustat[CPUTIME_USER];
+               nice = kcs->cpustat[CPUTIME_NICE];
+               system = kcs->cpustat[CPUTIME_SYSTEM];
+               idle = get_idle_time(kcs, i);
+               iowait = get_iowait_time(kcs, i);
+               irq = kcs->cpustat[CPUTIME_IRQ];
+               softirq = kcs->cpustat[CPUTIME_SOFTIRQ];
+               steal = kcs->cpustat[CPUTIME_STEAL];
+               guest = kcs->cpustat[CPUTIME_GUEST];
+               guest_nice = kcs->cpustat[CPUTIME_GUEST_NICE];
                 seq_printf(p, "cpu%d", i);
                 seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
                 seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
@@@ -181,7 -160,9 +185,7 @@@
         }
         seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
   
- -      /* sum again ? it could be updated? */
- -      for_each_irq_nr(j)
- -              seq_put_decimal_ull(p, " ", kstat_irqs_usr(j));
+ +      show_all_irqs(p);
   
         seq_printf(p,
                 "\nctxt %llu\n"
diff --combined fs/proc/task_nommu.c

index f912872,3b7e310..36bf0f2
--- 1/fs/proc/task_nommu.c
--- 2/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@@ -64,7 -64,7 +64,7 @@@ void task_mem(struct seq_file *m, struc
         else
                 bytes += kobjsize(current->files);
   
- -      if (current->sighand && atomic_read(&current->sighand->count) > 1)
+ +      if (current->sighand && refcount_read(&current->sighand->count) > 1)
                 sbytes += kobjsize(current->sighand);
         else
                 bytes += kobjsize(current->sighand);
@@@ -178,7 -178,7 +178,7 @@@ static int nommu_vma_show(struct seq_fi
                 seq_file_path(m, file, "");
         } else if (mm && is_stack(vma)) {
                 seq_pad(m, ' ');
-               seq_printf(m, "[stack]");
+               seq_puts(m, "[stack]");
         }
   
         seq_putc(m, '\n');
diff --combined include/linux/sched.h

index 903ef29,36ec6e7..f073bd5
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -21,7 -21,6 +21,7 @@@
   #include <linux/seccomp.h>
   #include <linux/nodemask.h>
   #include <linux/rcupdate.h>
+ +#include <linux/refcount.h>
   #include <linux/resource.h>
   #include <linux/latencytop.h>
   #include <linux/sched/prio.h>
@@@ -48,6 -47,7 +48,7 @@@ struct pid_namespace
   struct pipe_inode_info;
   struct rcu_node;
   struct reclaim_state;
+ struct capture_control;
   struct robust_list_head;
   struct sched_attr;
   struct sched_param;
@@@ -357,6 -357,12 +358,6 @@@ struct util_est 
    * For cfs_rq, it is the aggregated load_avg of all runnable and
    * blocked sched_entities.
    *
- - * load_avg may also take frequency scaling into account:
- - *
- - *   load_avg = runnable% * scale_load_down(load) * freq%
- - *
- - * where freq% is the CPU frequency normalized to the highest frequency.
- - *
    * [util_avg definition]
    *
    *   util_avg = running% * SCHED_CAPACITY_SCALE
@@@ -365,14 -371,17 +366,14 @@@
    * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
    * and blocked sched_entities.
    *
- - * util_avg may also factor frequency scaling and CPU capacity scaling:
+ + * load_avg and util_avg don't direcly factor frequency scaling and CPU
+ + * capacity scaling. The scaling is done through the rq_clock_pelt that
+ + * is used for computing those signals (see update_rq_clock_pelt())
    *
- - *   util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
- - *
- - * where freq% is the same as above, and capacity% is the CPU capacity
- - * normalized to the greatest capacity (due to uarch differences, etc).
- - *
- - * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
- - * themselves are in the range of [0, 1]. To do fixed point arithmetics,
- - * we therefore scale them to as large a range as necessary. This is for
- - * example reflected by util_avg's SCHED_CAPACITY_SCALE.
+ + * N.B., the above ratios (runnable% and running%) themselves are in the
+ + * range of [0, 1]. To do fixed point arithmetics, we therefore scale them
+ + * to as large a range as necessary. This is for example reflected by
+ + * util_avg's SCHED_CAPACITY_SCALE.
    *
    * [Overflow issue]
    *
@@@ -599,7 -608,7 +600,7 @@@ struct task_struct 
         randomized_struct_fields_start
   
         void                            *stack;
- -      atomic_t                        usage;
+ +      refcount_t                      usage;
         /* Per task flags (PF_*), defined further below: */
         unsigned int                    flags;
         unsigned int                    ptrace;
@@@ -950,6 -959,9 +951,9 @@@
   
         struct io_context               *io_context;
   
+ #ifdef CONFIG_COMPACTION
+       struct capture_control          *capture_control;
+ #endif
         /* Ptrace state: */
         unsigned long                   ptrace_message;
         kernel_siginfo_t                *last_siginfo;
@@@ -1179,7 -1191,7 +1183,7 @@@
   #endif
   #ifdef CONFIG_THREAD_INFO_IN_TASK
         /* A live task holds one reference: */
- -      atomic_t                        stack_refcount;
+ +      refcount_t                      stack_refcount;
   #endif
   #ifdef CONFIG_LIVEPATCH
         int patch_state;
@@@ -1395,6 -1407,8 +1399,7 @@@ extern struct pid *cad_pid
   #define PF_UMH                        0x02000000      /* I'm an Usermodehelper process */
   #define PF_NO_SETAFFINITY     0x04000000      /* Userland is not allowed to meddle with cpus_allowed */
   #define PF_MCE_EARLY          0x08000000      /* Early kill for mce process policy */
- -#define PF_MUTEX_TESTER               0x20000000      /* Thread belongs to the rt mutex tester */
+ #define PF_MEMALLOC_NOCMA     0x10000000      /* All allocation request will have _GFP_MOVABLE cleared */
   #define PF_FREEZER_SKIP               0x40000000      /* Freezer should not count it as freezable */
   #define PF_SUSPEND_TASK               0x80000000      /* This thread called freeze_processes() and should not be frozen */
   
@@@ -1444,7 -1458,6 +1449,7 @@@ static inline bool is_percpu_thread(voi
   #define PFA_SPEC_SSB_FORCE_DISABLE    4       /* Speculative Store Bypass force disabled*/
   #define PFA_SPEC_IB_DISABLE           5       /* Indirect branch speculation restricted */
   #define PFA_SPEC_IB_FORCE_DISABLE     6       /* Indirect branch speculation permanently restricted */
+ +#define PFA_SPEC_SSB_NOEXEC           7       /* Speculative Store Bypass clear on execve() */
   
   #define TASK_PFA_TEST(name, func)                                     \
         static inline bool task_##func(struct task_struct *p)           \
@@@ -1473,10 -1486,6 +1478,10 @@@ TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ss
   TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
   TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
   
+ +TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec)
+ +TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec)
+ +TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec)
+ +
   TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
   TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
   
@@@ -1744,9 -1753,9 +1749,9 @@@ static __always_inline bool need_resche
   static inline unsigned int task_cpu(const struct task_struct *p)
   {
   #ifdef CONFIG_THREAD_INFO_IN_TASK
- -      return p->cpu;
+ +      return READ_ONCE(p->cpu);
   #else
- -      return task_thread_info(p)->cpu;
+ +      return READ_ONCE(task_thread_info(p)->cpu);
   #endif
   }
   
diff --combined init/init_task.c

index 46dbf54,26131e7..df0257c
--- 1/init/init_task.c
--- 2/init/init_task.c
+++ b/init/init_task.c
@@@ -10,6 -10,7 +10,7 @@@
   #include <linux/fs.h>
   #include <linux/mm.h>
   #include <linux/audit.h>
+ #include <linux/numa.h>
   
   #include <asm/pgtable.h>
   #include <linux/uaccess.h>
@@@ -44,7 -45,7 +45,7 @@@ static struct signal_struct init_signal
   };
   
   static struct sighand_struct init_sighand = {
- -      .count          = ATOMIC_INIT(1),
+ +      .count          = REFCOUNT_INIT(1),
         .action         = { { { .sa_handler = SIG_DFL, } }, },
         .siglock        = __SPIN_LOCK_UNLOCKED(init_sighand.siglock),
         .signalfd_wqh   = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
@@@ -61,11 -62,11 +62,11 @@@ struct task_struct init_tas
   = {
   #ifdef CONFIG_THREAD_INFO_IN_TASK
         .thread_info    = INIT_THREAD_INFO(init_task),
- -      .stack_refcount = ATOMIC_INIT(1),
+ +      .stack_refcount = REFCOUNT_INIT(1),
   #endif
         .state          = 0,
         .stack          = init_stack,
- -      .usage          = ATOMIC_INIT(2),
+ +      .usage          = REFCOUNT_INIT(2),
         .flags          = PF_KTHREAD,
         .prio           = MAX_PRIO - 20,
         .static_prio    = MAX_PRIO - 20,
@@@ -154,7 -155,7 +155,7 @@@
         .vtime.state    = VTIME_SYS,
   #endif
   #ifdef CONFIG_NUMA_BALANCING
-       .numa_preferred_nid = -1,
+       .numa_preferred_nid = NUMA_NO_NODE,
         .numa_group     = NULL,
         .numa_faults    = NULL,
   #endif
diff --combined kernel/kthread.c

index 9cf20cc,ebebbcf..5942eea
--- 1/kernel/kthread.c
--- 2/kernel/kthread.c
+++ b/kernel/kthread.c
@@@ -20,6 -20,7 +20,7 @@@
   #include <linux/freezer.h>
   #include <linux/ptrace.h>
   #include <linux/uaccess.h>
+ #include <linux/numa.h>
   #include <trace/events/sched.h>
   
   static DEFINE_SPINLOCK(kthread_create_lock);
@@@ -101,12 -102,6 +102,12 @@@ bool kthread_should_stop(void
   }
   EXPORT_SYMBOL(kthread_should_stop);
   
+ +bool __kthread_should_park(struct task_struct *k)
+ +{
+ +      return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(k)->flags);
+ +}
+ +EXPORT_SYMBOL_GPL(__kthread_should_park);
+ +
   /**
    * kthread_should_park - should this kthread park now?
    *
@@@ -120,7 -115,7 +121,7 @@@
    */
   bool kthread_should_park(void)
   {
- -      return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
+ +      return __kthread_should_park(current);
   }
   EXPORT_SYMBOL_GPL(kthread_should_park);
   
@@@ -605,7 -600,7 +606,7 @@@ void __kthread_init_worker(struct kthre
                                 struct lock_class_key *key)
   {
         memset(worker, 0, sizeof(struct kthread_worker));
- -      spin_lock_init(&worker->lock);
+ +      raw_spin_lock_init(&worker->lock);
         lockdep_set_class_and_name(&worker->lock, key, name);
         INIT_LIST_HEAD(&worker->work_list);
         INIT_LIST_HEAD(&worker->delayed_work_list);
@@@ -647,21 -642,21 +648,21 @@@ repeat
   
         if (kthread_should_stop()) {
                 __set_current_state(TASK_RUNNING);
- -              spin_lock_irq(&worker->lock);
+ +              raw_spin_lock_irq(&worker->lock);
                 worker->task = NULL;
- -              spin_unlock_irq(&worker->lock);
+ +              raw_spin_unlock_irq(&worker->lock);
                 return 0;
         }
   
         work = NULL;
- -      spin_lock_irq(&worker->lock);
+ +      raw_spin_lock_irq(&worker->lock);
         if (!list_empty(&worker->work_list)) {
                 work = list_first_entry(&worker->work_list,
                                         struct kthread_work, node);
                 list_del_init(&work->node);
         }
         worker->current_work = work;
- -      spin_unlock_irq(&worker->lock);
+ +      raw_spin_unlock_irq(&worker->lock);
   
         if (work) {
                 __set_current_state(TASK_RUNNING);
@@@ -681,7 -676,7 +682,7 @@@ __kthread_create_worker(int cpu, unsign
   {
         struct kthread_worker *worker;
         struct task_struct *task;
-       int node = -1;
+       int node = NUMA_NO_NODE;
   
         worker = kzalloc(sizeof(*worker), GFP_KERNEL);
         if (!worker)
@@@ -818,12 -813,12 +819,12 @@@ bool kthread_queue_work(struct kthread_
         bool ret = false;
         unsigned long flags;
   
- -      spin_lock_irqsave(&worker->lock, flags);
+ +      raw_spin_lock_irqsave(&worker->lock, flags);
         if (!queuing_blocked(worker, work)) {
                 kthread_insert_work(worker, work, &worker->work_list);
                 ret = true;
         }
- -      spin_unlock_irqrestore(&worker->lock, flags);
+ +      raw_spin_unlock_irqrestore(&worker->lock, flags);
         return ret;
   }
   EXPORT_SYMBOL_GPL(kthread_queue_work);
@@@ -841,7 -836,6 +842,7 @@@ void kthread_delayed_work_timer_fn(stru
         struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
         struct kthread_work *work = &dwork->work;
         struct kthread_worker *worker = work->worker;
+ +      unsigned long flags;
   
         /*
          * This might happen when a pending work is reinitialized.
@@@ -850,7 -844,7 +851,7 @@@
         if (WARN_ON_ONCE(!worker))
                 return;
   
- -      spin_lock(&worker->lock);
+ +      raw_spin_lock_irqsave(&worker->lock, flags);
         /* Work must not be used with >1 worker, see kthread_queue_work(). */
         WARN_ON_ONCE(work->worker != worker);
   
@@@ -859,7 -853,7 +860,7 @@@
         list_del_init(&work->node);
         kthread_insert_work(worker, work, &worker->work_list);
   
- -      spin_unlock(&worker->lock);
+ +      raw_spin_unlock_irqrestore(&worker->lock, flags);
   }
   EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
   
@@@ -915,14 -909,14 +916,14 @@@ bool kthread_queue_delayed_work(struct 
         unsigned long flags;
         bool ret = false;
   
- -      spin_lock_irqsave(&worker->lock, flags);
+ +      raw_spin_lock_irqsave(&worker->lock, flags);
   
         if (!queuing_blocked(worker, work)) {
                 __kthread_queue_delayed_work(worker, dwork, delay);
                 ret = true;
         }
   
- -      spin_unlock_irqrestore(&worker->lock, flags);
+ +      raw_spin_unlock_irqrestore(&worker->lock, flags);
         return ret;
   }
   EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
@@@ -958,7 -952,7 +959,7 @@@ void kthread_flush_work(struct kthread_
         if (!worker)
                 return;
   
- -      spin_lock_irq(&worker->lock);
+ +      raw_spin_lock_irq(&worker->lock);
         /* Work must not be used with >1 worker, see kthread_queue_work(). */
         WARN_ON_ONCE(work->worker != worker);
   
@@@ -970,7 -964,7 +971,7 @@@
         else
                 noop = true;
   
- -      spin_unlock_irq(&worker->lock);
+ +      raw_spin_unlock_irq(&worker->lock);
   
         if (!noop)
                 wait_for_completion(&fwork.done);
@@@ -1003,9 -997,9 +1004,9 @@@ static bool __kthread_cancel_work(struc
                  * any queuing is blocked by setting the canceling counter.
                  */
                 work->canceling++;
- -              spin_unlock_irqrestore(&worker->lock, *flags);
+ +              raw_spin_unlock_irqrestore(&worker->lock, *flags);
                 del_timer_sync(&dwork->timer);
- -              spin_lock_irqsave(&worker->lock, *flags);
+ +              raw_spin_lock_irqsave(&worker->lock, *flags);
                 work->canceling--;
         }
   
@@@ -1052,7 -1046,7 +1053,7 @@@ bool kthread_mod_delayed_work(struct kt
         unsigned long flags;
         int ret = false;
   
- -      spin_lock_irqsave(&worker->lock, flags);
+ +      raw_spin_lock_irqsave(&worker->lock, flags);
   
         /* Do not bother with canceling when never queued. */
         if (!work->worker)
@@@ -1069,7 -1063,7 +1070,7 @@@
   fast_queue:
         __kthread_queue_delayed_work(worker, dwork, delay);
   out:
- -      spin_unlock_irqrestore(&worker->lock, flags);
+ +      raw_spin_unlock_irqrestore(&worker->lock, flags);
         return ret;
   }
   EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
@@@ -1083,7 -1077,7 +1084,7 @@@ static bool __kthread_cancel_work_sync(
         if (!worker)
                 goto out;
   
- -      spin_lock_irqsave(&worker->lock, flags);
+ +      raw_spin_lock_irqsave(&worker->lock, flags);
         /* Work must not be used with >1 worker, see kthread_queue_work(). */
         WARN_ON_ONCE(work->worker != worker);
   
@@@ -1097,13 -1091,13 +1098,13 @@@
          * In the meantime, block any queuing by setting the canceling counter.
          */
         work->canceling++;
- -      spin_unlock_irqrestore(&worker->lock, flags);
+ +      raw_spin_unlock_irqrestore(&worker->lock, flags);
         kthread_flush_work(work);
- -      spin_lock_irqsave(&worker->lock, flags);
+ +      raw_spin_lock_irqsave(&worker->lock, flags);
         work->canceling--;
   
   out_fast:
- -      spin_unlock_irqrestore(&worker->lock, flags);
+ +      raw_spin_unlock_irqrestore(&worker->lock, flags);
   out:
         return ret;
   }
diff --combined kernel/sched/core.c

index f3901b8,916e956..ead464a
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -107,12 -107,11 +107,12 @@@ struct rq *task_rq_lock(struct task_str
                  *                                      [L] ->on_rq
                  *      RELEASE (rq->lock)
                  *
- -               * If we observe the old CPU in task_rq_lock, the acquire of
+ +               * If we observe the old CPU in task_rq_lock(), the acquire of
                  * the old rq->lock will fully serialize against the stores.
                  *
- -               * If we observe the new CPU in task_rq_lock, the acquire will
- -               * pair with the WMB to ensure we must then also see migrating.
+ +               * If we observe the new CPU in task_rq_lock(), the address
+ +               * dependency headed by '[L] rq = task_rq()' and the acquire
+ +               * will pair with the WMB to ensure we then also see migrating.
                  */
                 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
                         rq_pin_lock(rq, rf);
@@@ -181,7 -180,6 +181,7 @@@ static void update_rq_clock_task(struc
         if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
                 update_irq_load_avg(rq, irq_delta + steal);
   #endif
+ +      update_rq_clock_pelt(rq, delta);
   }
   
   void update_rq_clock(struct rq *rq)
@@@ -398,7 -396,19 +398,7 @@@ static bool set_nr_if_polling(struct ta
   #endif
   #endif
   
- -/**
- - * wake_q_add() - queue a wakeup for 'later' waking.
- - * @head: the wake_q_head to add @task to
- - * @task: the task to queue for 'later' wakeup
- - *
- - * Queue a task for later wakeup, most likely by the wake_up_q() call in the
- - * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
- - * instantly.
- - *
- - * This function must be used as-if it were wake_up_process(); IOW the task
- - * must be ready to be woken at this location.
- - */
- -void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+ +static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
   {
         struct wake_q_node *node = &task->wake_q;
   
@@@ -411,56 -421,16 +411,56 @@@
          * state, even in the failed case, an explicit smp_mb() must be used.
          */
         smp_mb__before_atomic();
- -      if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))
- -              return;
- -
- -      get_task_struct(task);
+ +      if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
+ +              return false;
   
         /*
          * The head is context local, there can be no concurrency.
          */
         *head->lastp = node;
         head->lastp = &node->next;
+ +      return true;
+ +}
+ +
+ +/**
+ + * wake_q_add() - queue a wakeup for 'later' waking.
+ + * @head: the wake_q_head to add @task to
+ + * @task: the task to queue for 'later' wakeup
+ + *
+ + * Queue a task for later wakeup, most likely by the wake_up_q() call in the
+ + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
+ + * instantly.
+ + *
+ + * This function must be used as-if it were wake_up_process(); IOW the task
+ + * must be ready to be woken at this location.
+ + */
+ +void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+ +{
+ +      if (__wake_q_add(head, task))
+ +              get_task_struct(task);
+ +}
+ +
+ +/**
+ + * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
+ + * @head: the wake_q_head to add @task to
+ + * @task: the task to queue for 'later' wakeup
+ + *
+ + * Queue a task for later wakeup, most likely by the wake_up_q() call in the
+ + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
+ + * instantly.
+ + *
+ + * This function must be used as-if it were wake_up_process(); IOW the task
+ + * must be ready to be woken at this location.
+ + *
+ + * This function is essentially a task-safe equivalent to wake_q_add(). Callers
+ + * that already hold reference to @task can call the 'safe' version and trust
+ + * wake_q to do the right thing depending whether or not the @task is already
+ + * queued for wakeup.
+ + */
+ +void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
+ +{
+ +      if (!__wake_q_add(head, task))
+ +              put_task_struct(task);
   }
   
   void wake_up_q(struct wake_q_head *head)
@@@ -958,7 -928,7 +958,7 @@@ static struct rq *move_queued_task(stru
   {
         lockdep_assert_held(&rq->lock);
   
- -      p->on_rq = TASK_ON_RQ_MIGRATING;
+ +      WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
         dequeue_task(rq, p, DEQUEUE_NOCLOCK);
         set_task_cpu(p, new_cpu);
         rq_unlock(rq, rf);
@@@ -2220,6 -2190,9 +2220,9 @@@ static void __sched_fork(unsigned long 
         INIT_HLIST_HEAD(&p->preempt_notifiers);
   #endif
   
+ #ifdef CONFIG_COMPACTION
+       p->capture_control = NULL;
+ #endif
         init_numa_balancing(clone_flags, p);
   }
   
@@@ -2461,7 -2434,7 +2464,7 @@@ void wake_up_new_task(struct task_struc
   #endif
         rq = __task_rq_lock(p, &rf);
         update_rq_clock(rq);
- -      post_init_entity_util_avg(&p->se);
+ +      post_init_entity_util_avg(p);
   
         activate_task(rq, p, ENQUEUE_NOCLOCK);
         p->on_rq = TASK_ON_RQ_QUEUED;
@@@ -5295,8 -5268,9 +5298,8 @@@ SYSCALL_DEFINE2(sched_rr_get_interval, 
   }
   
   #ifdef CONFIG_COMPAT_32BIT_TIME
- -COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
- -                     compat_pid_t, pid,
- -                     struct old_timespec32 __user *, interval)
+ +SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
+ +              struct old_timespec32 __user *, interval)
   {
         struct timespec64 t;
         int retval = sched_rr_get_interval(pid, &t);
@@@ -5896,11 -5870,14 +5899,11 @@@ void __init sched_init_smp(void
         /*
          * There's no userspace yet to cause hotplug operations; hence all the
          * CPU masks are stable and all blatant races in the below code cannot
- -       * happen. The hotplug lock is nevertheless taken to satisfy lockdep,
- -       * but there won't be any contention on it.
+ +       * happen.
          */
- -      cpus_read_lock();
         mutex_lock(&sched_domains_mutex);
         sched_init_domains(cpu_active_mask);
         mutex_unlock(&sched_domains_mutex);
- -      cpus_read_unlock();
   
         /* Move init over to a non-isolated CPU */
         if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
diff --combined kernel/sched/fair.c

index 8213ff6,0e6a0ef..ea74d43
--- 1/kernel/sched/fair.c
--- 2/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@@ -248,6 -248,13 +248,6 @@@ const struct sched_class fair_sched_cla
    */
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
- -
- -/* cpu runqueue to which this cfs_rq is attached */
- -static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
- -{
- -      return cfs_rq->rq;
- -}
- -
   static inline struct task_struct *task_of(struct sched_entity *se)
   {
         SCHED_WARN_ON(!entity_is_task(se));
@@@ -275,103 -282,79 +275,103 @@@ static inline struct cfs_rq *group_cfs_
         return grp->my_q;
   }
   
- -static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+ +static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
   {
- -      if (!cfs_rq->on_list) {
- -              struct rq *rq = rq_of(cfs_rq);
- -              int cpu = cpu_of(rq);
+ +      struct rq *rq = rq_of(cfs_rq);
+ +      int cpu = cpu_of(rq);
+ +
+ +      if (cfs_rq->on_list)
+ +              return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
+ +
+ +      cfs_rq->on_list = 1;
+ +
+ +      /*
+ +       * Ensure we either appear before our parent (if already
+ +       * enqueued) or force our parent to appear after us when it is
+ +       * enqueued. The fact that we always enqueue bottom-up
+ +       * reduces this to two cases and a special case for the root
+ +       * cfs_rq. Furthermore, it also means that we will always reset
+ +       * tmp_alone_branch either when the branch is connected
+ +       * to a tree or when we reach the top of the tree
+ +       */
+ +      if (cfs_rq->tg->parent &&
+ +          cfs_rq->tg->parent->cfs_rq[cpu]->on_list) {
                 /*
- -               * Ensure we either appear before our parent (if already
- -               * enqueued) or force our parent to appear after us when it is
- -               * enqueued. The fact that we always enqueue bottom-up
- -               * reduces this to two cases and a special case for the root
- -               * cfs_rq. Furthermore, it also means that we will always reset
- -               * tmp_alone_branch either when the branch is connected
- -               * to a tree or when we reach the beg of the tree
+ +               * If parent is already on the list, we add the child
+ +               * just before. Thanks to circular linked property of
+ +               * the list, this means to put the child at the tail
+ +               * of the list that starts by parent.
                  */
- -              if (cfs_rq->tg->parent &&
- -                  cfs_rq->tg->parent->cfs_rq[cpu]->on_list) {
- -                      /*
- -                       * If parent is already on the list, we add the child
- -                       * just before. Thanks to circular linked property of
- -                       * the list, this means to put the child at the tail
- -                       * of the list that starts by parent.
- -                       */
- -                      list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
- -                              &(cfs_rq->tg->parent->cfs_rq[cpu]->leaf_cfs_rq_list));
- -                      /*
- -                       * The branch is now connected to its tree so we can
- -                       * reset tmp_alone_branch to the beginning of the
- -                       * list.
- -                       */
- -                      rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
- -              } else if (!cfs_rq->tg->parent) {
- -                      /*
- -                       * cfs rq without parent should be put
- -                       * at the tail of the list.
- -                       */
- -                      list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
- -                              &rq->leaf_cfs_rq_list);
- -                      /*
- -                       * We have reach the beg of a tree so we can reset
- -                       * tmp_alone_branch to the beginning of the list.
- -                       */
- -                      rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
- -              } else {
- -                      /*
- -                       * The parent has not already been added so we want to
- -                       * make sure that it will be put after us.
- -                       * tmp_alone_branch points to the beg of the branch
- -                       * where we will add parent.
- -                       */
- -                      list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
- -                              rq->tmp_alone_branch);
- -                      /*
- -                       * update tmp_alone_branch to points to the new beg
- -                       * of the branch
- -                       */
- -                      rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
- -              }
+ +              list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
+ +                      &(cfs_rq->tg->parent->cfs_rq[cpu]->leaf_cfs_rq_list));
+ +              /*
+ +               * The branch is now connected to its tree so we can
+ +               * reset tmp_alone_branch to the beginning of the
+ +               * list.
+ +               */
+ +              rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
+ +              return true;
+ +      }
   
- -              cfs_rq->on_list = 1;
+ +      if (!cfs_rq->tg->parent) {
+ +              /*
+ +               * cfs rq without parent should be put
+ +               * at the tail of the list.
+ +               */
+ +              list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
+ +                      &rq->leaf_cfs_rq_list);
+ +              /*
+ +               * We have reach the top of a tree so we can reset
+ +               * tmp_alone_branch to the beginning of the list.
+ +               */
+ +              rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
+ +              return true;
         }
+ +
+ +      /*
+ +       * The parent has not already been added so we want to
+ +       * make sure that it will be put after us.
+ +       * tmp_alone_branch points to the begin of the branch
+ +       * where we will add parent.
+ +       */
+ +      list_add_rcu(&cfs_rq->leaf_cfs_rq_list, rq->tmp_alone_branch);
+ +      /*
+ +       * update tmp_alone_branch to points to the new begin
+ +       * of the branch
+ +       */
+ +      rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
+ +      return false;
   }
   
   static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
   {
         if (cfs_rq->on_list) {
+ +              struct rq *rq = rq_of(cfs_rq);
+ +
+ +              /*
+ +               * With cfs_rq being unthrottled/throttled during an enqueue,
+ +               * it can happen the tmp_alone_branch points the a leaf that
+ +               * we finally want to del. In this case, tmp_alone_branch moves
+ +               * to the prev element but it will point to rq->leaf_cfs_rq_list
+ +               * at the end of the enqueue.
+ +               */
+ +              if (rq->tmp_alone_branch == &cfs_rq->leaf_cfs_rq_list)
+ +                      rq->tmp_alone_branch = cfs_rq->leaf_cfs_rq_list.prev;
+ +
                 list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
                 cfs_rq->on_list = 0;
         }
   }
   
- -/* Iterate through all leaf cfs_rq's on a runqueue: */
- -#define for_each_leaf_cfs_rq(rq, cfs_rq) \
- -      list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
+ +static inline void assert_list_leaf_cfs_rq(struct rq *rq)
+ +{
+ +      SCHED_WARN_ON(rq->tmp_alone_branch != &rq->leaf_cfs_rq_list);
+ +}
+ +
+ +/* Iterate thr' all leaf cfs_rq's on a runqueue */
+ +#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)                    \
+ +      list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list,    \
+ +                               leaf_cfs_rq_list)
   
   /* Do the two (enqueued) entities belong to the same group ? */
   static inline struct cfs_rq *
@@@ -427,6 -410,12 +427,6 @@@ static inline struct task_struct *task_
         return container_of(se, struct task_struct, se);
   }
   
- -static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
- -{
- -      return container_of(cfs_rq, struct rq, cfs);
- -}
- -
- -
   #define for_each_sched_entity(se) \
                 for (; se; se = NULL)
   
@@@ -449,21 -438,16 +449,21 @@@ static inline struct cfs_rq *group_cfs_
         return NULL;
   }
   
- -static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+ +static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
   {
+ +      return true;
   }
   
   static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
   {
   }
   
- -#define for_each_leaf_cfs_rq(rq, cfs_rq)      \
- -              for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
+ +static inline void assert_list_leaf_cfs_rq(struct rq *rq)
+ +{
+ +}
+ +
+ +#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)    \
+ +              for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos)
   
   static inline struct sched_entity *parent_entity(struct sched_entity *se)
   {
@@@ -702,8 -686,9 +702,8 @@@ static u64 sched_vslice(struct cfs_rq *
         return calc_delta_fair(sched_slice(cfs_rq, se), se);
   }
   
- -#ifdef CONFIG_SMP
   #include "pelt.h"
- -#include "sched-pelt.h"
+ +#ifdef CONFIG_SMP
   
   static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
   static unsigned long task_h_load(struct task_struct *p);
@@@ -759,9 -744,8 +759,9 @@@ static void attach_entity_cfs_rq(struc
    * Finally, that extrapolated util_avg is clamped to the cap (util_avg_cap)
    * if util_avg > util_avg_cap.
    */
- -void post_init_entity_util_avg(struct sched_entity *se)
+ +void post_init_entity_util_avg(struct task_struct *p)
   {
+ +      struct sched_entity *se = &p->se;
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
         struct sched_avg *sa = &se->avg;
         long cpu_scale = arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
@@@ -779,19 -763,22 +779,19 @@@
                 }
         }
   
- -      if (entity_is_task(se)) {
- -              struct task_struct *p = task_of(se);
- -              if (p->sched_class != &fair_sched_class) {
- -                      /*
- -                       * For !fair tasks do:
- -                       *
- -                      update_cfs_rq_load_avg(now, cfs_rq);
- -                      attach_entity_load_avg(cfs_rq, se, 0);
- -                      switched_from_fair(rq, p);
- -                       *
- -                       * such that the next switched_to_fair() has the
- -                       * expected state.
- -                       */
- -                      se->avg.last_update_time = cfs_rq_clock_task(cfs_rq);
- -                      return;
- -              }
+ +      if (p->sched_class != &fair_sched_class) {
+ +              /*
+ +               * For !fair tasks do:
+ +               *
+ +              update_cfs_rq_load_avg(now, cfs_rq);
+ +              attach_entity_load_avg(cfs_rq, se, 0);
+ +              switched_from_fair(rq, p);
+ +               *
+ +               * such that the next switched_to_fair() has the
+ +               * expected state.
+ +               */
+ +              se->avg.last_update_time = cfs_rq_clock_pelt(cfs_rq);
+ +              return;
         }
   
         attach_entity_cfs_rq(se);
@@@ -801,7 -788,7 +801,7 @@@
   void init_entity_runnable_average(struct sched_entity *se)
   {
   }
- -void post_init_entity_util_avg(struct sched_entity *se)
+ +void post_init_entity_util_avg(struct task_struct *p)
   {
   }
   static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
@@@ -1048,7 -1035,7 +1048,7 @@@ unsigned int sysctl_numa_balancing_scan
   unsigned int sysctl_numa_balancing_scan_delay = 1000;
   
   struct numa_group {
- -      atomic_t refcount;
+ +      refcount_t refcount;
   
         spinlock_t lock; /* nr_tasks, tasks */
         int nr_tasks;
@@@ -1117,7 -1104,7 +1117,7 @@@ static unsigned int task_scan_start(str
                 unsigned long shared = group_faults_shared(ng);
                 unsigned long private = group_faults_priv(ng);
   
- -              period *= atomic_read(&ng->refcount);
+ +              period *= refcount_read(&ng->refcount);
                 period *= shared + 1;
                 period /= private + shared + 1;
         }
@@@ -1140,7 -1127,7 +1140,7 @@@ static unsigned int task_scan_max(struc
                 unsigned long private = group_faults_priv(ng);
                 unsigned long period = smax;
   
- -              period *= atomic_read(&ng->refcount);
+ +              period *= refcount_read(&ng->refcount);
                 period *= shared + 1;
                 period /= private + shared + 1;
   
@@@ -1173,7 -1160,7 +1173,7 @@@ void init_numa_balancing(unsigned long 
   
         /* New address space, reset the preferred nid */
         if (!(clone_flags & CLONE_VM)) {
-               p->numa_preferred_nid = -1;
+               p->numa_preferred_nid = NUMA_NO_NODE;
                 return;
         }
   
@@@ -1193,13 -1180,13 +1193,13 @@@
   
   static void account_numa_enqueue(struct rq *rq, struct task_struct *p)
   {
-       rq->nr_numa_running += (p->numa_preferred_nid != -1);
+       rq->nr_numa_running += (p->numa_preferred_nid != NUMA_NO_NODE);
         rq->nr_preferred_running += (p->numa_preferred_nid == task_node(p));
   }
   
   static void account_numa_dequeue(struct rq *rq, struct task_struct *p)
   {
-       rq->nr_numa_running -= (p->numa_preferred_nid != -1);
+       rq->nr_numa_running -= (p->numa_preferred_nid != NUMA_NO_NODE);
         rq->nr_preferred_running -= (p->numa_preferred_nid == task_node(p));
   }
   
@@@ -1413,7 -1400,7 +1413,7 @@@ bool should_numa_migrate_memory(struct 
          * two full passes of the "multi-stage node selection" test that is
          * executed below.
          */
-       if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) &&
+       if ((p->numa_preferred_nid == NUMA_NO_NODE || p->numa_scan_seq <= 4) &&
             (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid)))
                 return true;
   
@@@ -1861,7 -1848,7 +1861,7 @@@ static void numa_migrate_preferred(stru
         unsigned long interval = HZ;
   
         /* This task has no NUMA fault statistics yet */
-       if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
+       if (unlikely(p->numa_preferred_nid == NUMA_NO_NODE || !p->numa_faults))
                 return;
   
         /* Periodically retry migrating the task to the preferred node */
@@@ -2108,7 -2095,7 +2108,7 @@@ static int preferred_group_nid(struct t
   
   static void task_numa_placement(struct task_struct *p)
   {
-       int seq, nid, max_nid = -1;
+       int seq, nid, max_nid = NUMA_NO_NODE;
         unsigned long max_faults = 0;
         unsigned long fault_types[2] = { 0, 0 };
         unsigned long total_faults;
@@@ -2216,12 -2203,12 +2216,12 @@@
   
   static inline int get_numa_group(struct numa_group *grp)
   {
- -      return atomic_inc_not_zero(&grp->refcount);
+ +      return refcount_inc_not_zero(&grp->refcount);
   }
   
   static inline void put_numa_group(struct numa_group *grp)
   {
- -      if (atomic_dec_and_test(&grp->refcount))
+ +      if (refcount_dec_and_test(&grp->refcount))
                 kfree_rcu(grp, rcu);
   }
   
@@@ -2242,7 -2229,7 +2242,7 @@@ static void task_numa_group(struct task
                 if (!grp)
                         return;
   
- -              atomic_set(&grp->refcount, 1);
+ +              refcount_set(&grp->refcount, 1);
                 grp->active_nodes = 1;
                 grp->max_faults_cpu = 0;
                 spin_lock_init(&grp->lock);
@@@ -2651,7 -2638,8 +2651,8 @@@ static void update_scan_period(struct t
                  * the preferred node.
                  */
                 if (dst_nid == p->numa_preferred_nid ||
-                   (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid))
+                   (p->numa_preferred_nid != NUMA_NO_NODE &&
+                       src_nid != p->numa_preferred_nid))
                         return;
         }
   
@@@ -3135,7 -3123,7 +3136,7 @@@ void set_task_rq_fair(struct sched_enti
         p_last_update_time = prev->avg.last_update_time;
         n_last_update_time = next->avg.last_update_time;
   #endif
- -      __update_load_avg_blocked_se(p_last_update_time, cpu_of(rq_of(prev)), se);
+ +      __update_load_avg_blocked_se(p_last_update_time, se);
         se->avg.last_update_time = n_last_update_time;
   }
   
@@@ -3270,11 -3258,11 +3271,11 @@@ update_tg_cfs_runnable(struct cfs_rq *c
   
         /*
          * runnable_sum can't be lower than running_sum
- -       * As running sum is scale with CPU capacity wehreas the runnable sum
- -       * is not we rescale running_sum 1st
+ +       * Rescale running sum to be in the same range as runnable sum
+ +       * running_sum is in [0 : LOAD_AVG_MAX <<  SCHED_CAPACITY_SHIFT]
+ +       * runnable_sum is in [0 : LOAD_AVG_MAX]
          */
- -      running_sum = se->avg.util_sum /
- -              arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
+ +      running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT;
         runnable_sum = max(runnable_sum, running_sum);
   
         load_sum = (s64)se_weight(se) * runnable_sum;
@@@ -3377,7 -3365,7 +3378,7 @@@ static inline void add_tg_cfs_propagate
   
   /**
    * update_cfs_rq_load_avg - update the cfs_rq's load/util averages
- - * @now: current time, as per cfs_rq_clock_task()
+ + * @now: current time, as per cfs_rq_clock_pelt()
    * @cfs_rq: cfs_rq to update
    *
    * The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
@@@ -3422,7 -3410,7 +3423,7 @@@ update_cfs_rq_load_avg(u64 now, struct 
                 decayed = 1;
         }
   
- -      decayed |= __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq);
+ +      decayed |= __update_load_avg_cfs_rq(now, cfs_rq);
   
   #ifndef CONFIG_64BIT
         smp_wmb();
@@@ -3512,7 -3500,9 +3513,7 @@@ static void detach_entity_load_avg(stru
   /* Update task and its cfs_rq load average */
   static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
   {
- -      u64 now = cfs_rq_clock_task(cfs_rq);
- -      struct rq *rq = rq_of(cfs_rq);
- -      int cpu = cpu_of(rq);
+ +      u64 now = cfs_rq_clock_pelt(cfs_rq);
         int decayed;
   
         /*
@@@ -3520,7 -3510,7 +3521,7 @@@
          * track group sched_entity load average for task_h_load calc in migration
          */
         if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
- -              __update_load_avg_se(now, cpu, cfs_rq, se);
+ +              __update_load_avg_se(now, cfs_rq, se);
   
         decayed  = update_cfs_rq_load_avg(now, cfs_rq);
         decayed |= propagate_entity_load_avg(se);
@@@ -3572,7 -3562,7 +3573,7 @@@ void sync_entity_load_avg(struct sched_
         u64 last_update_time;
   
         last_update_time = cfs_rq_last_update_time(cfs_rq);
- -      __update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), se);
+ +      __update_load_avg_blocked_se(last_update_time, se);
   }
   
   /*
@@@ -3588,6 -3578,10 +3589,6 @@@ void remove_entity_load_avg(struct sche
          * tasks cannot exit without having gone through wake_up_new_task() ->
          * post_init_entity_util_avg() which will have added things to the
          * cfs_rq, so we can remove unconditionally.
- -       *
- -       * Similarly for groups, they will have passed through
- -       * post_init_entity_util_avg() before unregister_sched_fair_group()
- -       * calls this.
          */
   
         sync_entity_load_avg(se);
@@@ -3661,7 -3655,6 +3662,7 @@@ util_est_dequeue(struct cfs_rq *cfs_rq
   {
         long last_ewma_diff;
         struct util_est ue;
+ +      int cpu;
   
         if (!sched_feat(UTIL_EST))
                 return;
@@@ -3696,14 -3689,6 +3697,14 @@@
                 return;
   
         /*
+ +       * To avoid overestimation of actual task utilization, skip updates if
+ +       * we cannot grant there is idle time in this CPU.
+ +       */
+ +      cpu = cpu_of(rq_of(cfs_rq));
+ +      if (task_util(p) > capacity_orig_of(cpu))
+ +              return;
+ +
+ +      /*
          * Update Task's estimated utilization
          *
          * When *p completes an activation we can consolidate another sample
@@@ -4445,10 -4430,6 +4446,10 @@@ static int tg_unthrottle_up(struct task
                 /* adjust cfs_rq_clock_task() */
                 cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
                                              cfs_rq->throttled_clock_task;
+ +
+ +              /* Add cfs_rq with already running entity in the list */
+ +              if (cfs_rq->nr_running >= 1)
+ +                      list_add_leaf_cfs_rq(cfs_rq);
         }
   
         return 0;
@@@ -4460,10 -4441,8 +4461,10 @@@ static int tg_throttle_down(struct task
         struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
   
         /* group is entering throttled state, stop time */
- -      if (!cfs_rq->throttle_count)
+ +      if (!cfs_rq->throttle_count) {
                 cfs_rq->throttled_clock_task = rq_clock_task(rq);
+ +              list_del_leaf_cfs_rq(cfs_rq);
+ +      }
         cfs_rq->throttle_count++;
   
         return 0;
@@@ -4566,8 -4545,6 +4567,8 @@@ void unthrottle_cfs_rq(struct cfs_rq *c
                         break;
         }
   
+ +      assert_list_leaf_cfs_rq(rq);
+ +
         if (!se)
                 add_nr_running(rq, task_delta);
   
@@@ -4589,7 -4566,7 +4590,7 @@@ static u64 distribute_cfs_runtime(struc
                 struct rq *rq = rq_of(cfs_rq);
                 struct rq_flags rf;
   
- -              rq_lock(rq, &rf);
+ +              rq_lock_irqsave(rq, &rf);
                 if (!cfs_rq_throttled(cfs_rq))
                         goto next;
   
@@@ -4606,7 -4583,7 +4607,7 @@@
                         unthrottle_cfs_rq(cfs_rq);
   
   next:
- -              rq_unlock(rq, &rf);
+ +              rq_unlock_irqrestore(rq, &rf);
   
                 if (!remaining)
                         break;
@@@ -4622,7 -4599,7 +4623,7 @@@
    * period the timer is deactivated until scheduling resumes; cfs_b->idle is
    * used to track this state.
    */
- -static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
+ +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
   {
         u64 runtime, runtime_expires;
         int throttled;
@@@ -4664,11 -4641,11 +4665,11 @@@
         while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
                 runtime = cfs_b->runtime;
                 cfs_b->distribute_running = 1;
- -              raw_spin_unlock(&cfs_b->lock);
+ +              raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
                 /* we can't nest cfs_b->lock while distributing bandwidth */
                 runtime = distribute_cfs_runtime(cfs_b, runtime,
                                                  runtime_expires);
- -              raw_spin_lock(&cfs_b->lock);
+ +              raw_spin_lock_irqsave(&cfs_b->lock, flags);
   
                 cfs_b->distribute_running = 0;
                 throttled = !list_empty(&cfs_b->throttled_cfs_rq);
@@@ -4777,18 -4754,17 +4778,18 @@@ static __always_inline void return_cfs_
   static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
   {
         u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
+ +      unsigned long flags;
         u64 expires;
   
         /* confirm we're still not at a refresh boundary */
- -      raw_spin_lock(&cfs_b->lock);
+ +      raw_spin_lock_irqsave(&cfs_b->lock, flags);
         if (cfs_b->distribute_running) {
- -              raw_spin_unlock(&cfs_b->lock);
+ +              raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
                 return;
         }
   
         if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
- -              raw_spin_unlock(&cfs_b->lock);
+ +              raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
                 return;
         }
   
@@@ -4799,18 -4775,18 +4800,18 @@@
         if (runtime)
                 cfs_b->distribute_running = 1;
   
- -      raw_spin_unlock(&cfs_b->lock);
+ +      raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
   
         if (!runtime)
                 return;
   
         runtime = distribute_cfs_runtime(cfs_b, runtime, expires);
   
- -      raw_spin_lock(&cfs_b->lock);
+ +      raw_spin_lock_irqsave(&cfs_b->lock, flags);
         if (expires == cfs_b->runtime_expires)
                 lsub_positive(&cfs_b->runtime, runtime);
         cfs_b->distribute_running = 0;
- -      raw_spin_unlock(&cfs_b->lock);
+ +      raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
   }
   
   /*
@@@ -4888,21 -4864,20 +4889,21 @@@ static enum hrtimer_restart sched_cfs_p
   {
         struct cfs_bandwidth *cfs_b =
                 container_of(timer, struct cfs_bandwidth, period_timer);
+ +      unsigned long flags;
         int overrun;
         int idle = 0;
   
- -      raw_spin_lock(&cfs_b->lock);
+ +      raw_spin_lock_irqsave(&cfs_b->lock, flags);
         for (;;) {
                 overrun = hrtimer_forward_now(timer, cfs_b->period);
                 if (!overrun)
                         break;
   
- -              idle = do_sched_cfs_period_timer(cfs_b, overrun);
+ +              idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
         }
         if (idle)
                 cfs_b->period_active = 0;
- -      raw_spin_unlock(&cfs_b->lock);
+ +      raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
   
         return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
   }
@@@ -5012,12 -4987,6 +5013,12 @@@ static void __maybe_unused unthrottle_o
   }
   
   #else /* CONFIG_CFS_BANDWIDTH */
+ +
+ +static inline bool cfs_bandwidth_used(void)
+ +{
+ +      return false;
+ +}
+ +
   static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
   {
         return rq_clock_task(rq_of(cfs_rq));
@@@ -5209,23 -5178,6 +5210,23 @@@ enqueue_task_fair(struct rq *rq, struc
   
         }
   
+ +      if (cfs_bandwidth_used()) {
+ +              /*
+ +               * When bandwidth control is enabled; the cfs_rq_throttled()
+ +               * breaks in the above iteration can result in incomplete
+ +               * leaf list maintenance, resulting in triggering the assertion
+ +               * below.
+ +               */
+ +              for_each_sched_entity(se) {
+ +                      cfs_rq = cfs_rq_of(se);
+ +
+ +                      if (list_add_leaf_cfs_rq(cfs_rq))
+ +                              break;
+ +              }
+ +      }
+ +
+ +      assert_list_leaf_cfs_rq(rq);
+ +
         hrtick_update(rq);
   }
   
@@@ -5605,6 -5557,11 +5606,6 @@@ static unsigned long capacity_of(int cp
         return cpu_rq(cpu)->cpu_capacity;
   }
   
- -static unsigned long capacity_orig_of(int cpu)
- -{
- -      return cpu_rq(cpu)->cpu_capacity_orig;
- -}
- -
   static unsigned long cpu_avg_load_per_task(int cpu)
   {
         struct rq *rq = cpu_rq(cpu);
@@@ -6097,7 -6054,7 +6098,7 @@@ static int select_idle_core(struct task
                 bool idle = true;
   
                 for_each_cpu(cpu, cpu_smt_mask(core)) {
- -                      cpumask_clear_cpu(cpu, cpus);
+ +                      __cpumask_clear_cpu(cpu, cpus);
                         if (!available_idle_cpu(cpu))
                                 idle = false;
                 }
@@@ -6117,7 -6074,7 +6118,7 @@@
   /*
    * Scan the local SMT mask for idle CPUs.
    */
- -static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+ +static int select_idle_smt(struct task_struct *p, int target)
   {
         int cpu;
   
@@@ -6141,7 -6098,7 +6142,7 @@@ static inline int select_idle_core(stru
         return -1;
   }
   
- -static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+ +static inline int select_idle_smt(struct task_struct *p, int target)
   {
         return -1;
   }
@@@ -6246,7 -6203,7 +6247,7 @@@ static int select_idle_sibling(struct t
         if ((unsigned)i < nr_cpumask_bits)
                 return i;
   
- -      i = select_idle_smt(p, sd, target);
+ +      i = select_idle_smt(p, target);
         if ((unsigned)i < nr_cpumask_bits)
                 return i;
   
@@@ -6652,7 -6609,7 +6653,7 @@@ select_task_rq_fair(struct task_struct 
         if (sd_flag & SD_BALANCE_WAKE) {
                 record_wakee(p);
   
- -              if (static_branch_unlikely(&sched_energy_present)) {
+ +              if (sched_energy_enabled()) {
                         new_cpu = find_energy_efficient_cpu(p, prev_cpu);
                         if (new_cpu >= 0)
                                 return new_cpu;
@@@ -7071,12 -7028,6 +7072,12 @@@ idle
         if (new_tasks > 0)
                 goto again;
   
+ +      /*
+ +       * rq is about to be idle, check if we need to update the
+ +       * lost_idle_time of clock_pelt
+ +       */
+ +      update_idle_rq_clock_pelt(rq);
+ +
         return NULL;
   }
   
@@@ -7697,27 -7648,10 +7698,27 @@@ static inline bool others_have_blocked(
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
   
+ +static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+ +{
+ +      if (cfs_rq->load.weight)
+ +              return false;
+ +
+ +      if (cfs_rq->avg.load_sum)
+ +              return false;
+ +
+ +      if (cfs_rq->avg.util_sum)
+ +              return false;
+ +
+ +      if (cfs_rq->avg.runnable_load_sum)
+ +              return false;
+ +
+ +      return true;
+ +}
+ +
   static void update_blocked_averages(int cpu)
   {
         struct rq *rq = cpu_rq(cpu);
- -      struct cfs_rq *cfs_rq;
+ +      struct cfs_rq *cfs_rq, *pos;
         const struct sched_class *curr_class;
         struct rq_flags rf;
         bool done = true;
@@@ -7729,10 -7663,14 +7730,10 @@@
          * Iterates the task_group tree in a bottom up fashion, see
          * list_add_leaf_cfs_rq() for details.
          */
- -      for_each_leaf_cfs_rq(rq, cfs_rq) {
+ +      for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
                 struct sched_entity *se;
   
- -              /* throttled entities do not contribute to load */
- -              if (throttled_hierarchy(cfs_rq))
- -                      continue;
- -
- -              if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+ +              if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq))
                         update_tg_load_avg(cfs_rq, 0);
   
                 /* Propagate pending load changes to the parent, if any: */
@@@ -7740,21 -7678,14 +7741,21 @@@
                 if (se && !skip_blocked_update(se))
                         update_load_avg(cfs_rq_of(se), se, 0);
   
+ +              /*
+ +               * There can be a lot of idle CPU cgroups.  Don't let fully
+ +               * decayed cfs_rqs linger on the list.
+ +               */
+ +              if (cfs_rq_is_decayed(cfs_rq))
+ +                      list_del_leaf_cfs_rq(cfs_rq);
+ +
                 /* Don't need periodic decay once load/util_avg are null */
                 if (cfs_rq_has_blocked(cfs_rq))
                         done = false;
         }
   
         curr_class = rq->curr->sched_class;
- -      update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
- -      update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
+ +      update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
+ +      update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
         update_irq_load_avg(rq, 0);
         /* Don't need periodic decay once load/util_avg are null */
         if (others_have_blocked(rq))
@@@ -7824,11 -7755,11 +7825,11 @@@ static inline void update_blocked_avera
   
         rq_lock_irqsave(rq, &rf);
         update_rq_clock(rq);
- -      update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+ +      update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
   
         curr_class = rq->curr->sched_class;
- -      update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
- -      update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
+ +      update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
+ +      update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
         update_irq_load_avg(rq, 0);
   #ifdef CONFIG_NO_HZ_COMMON
         rq->last_blocked_load_update_tick = jiffies;
@@@ -8522,7 -8453,9 +8523,7 @@@ static int check_asym_packing(struct lb
         if (sched_asym_prefer(busiest_cpu, env->dst_cpu))
                 return 0;
   
- -      env->imbalance = DIV_ROUND_CLOSEST(
- -              sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity,
- -              SCHED_CAPACITY_SCALE);
+ +      env->imbalance = sds->busiest_stat.group_load;
   
         return 1;
   }
@@@ -8704,7 -8637,7 +8705,7 @@@ static struct sched_group *find_busiest
          */
         update_sd_lb_stats(env, &sds);
   
- -      if (static_branch_unlikely(&sched_energy_present)) {
+ +      if (sched_energy_enabled()) {
                 struct root_domain *rd = env->dst_rq->rd;
   
                 if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized))
@@@ -8895,25 -8828,21 +8896,25 @@@ static struct rq *find_busiest_queue(st
    */
   #define MAX_PINNED_INTERVAL   512
   
- -static int need_active_balance(struct lb_env *env)
+ +static inline bool
+ +asym_active_balance(struct lb_env *env)
   {
- -      struct sched_domain *sd = env->sd;
+ +      /*
+ +       * ASYM_PACKING needs to force migrate tasks from busy but
+ +       * lower priority CPUs in order to pack all tasks in the
+ +       * highest priority CPUs.
+ +       */
+ +      return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
+ +             sched_asym_prefer(env->dst_cpu, env->src_cpu);
+ +}
   
- -      if (env->idle == CPU_NEWLY_IDLE) {
+ +static inline bool
+ +voluntary_active_balance(struct lb_env *env)
+ +{
+ +      struct sched_domain *sd = env->sd;
   
- -              /*
- -               * ASYM_PACKING needs to force migrate tasks from busy but
- -               * lower priority CPUs in order to pack all tasks in the
- -               * highest priority CPUs.
- -               */
- -              if ((sd->flags & SD_ASYM_PACKING) &&
- -                  sched_asym_prefer(env->dst_cpu, env->src_cpu))
- -                      return 1;
- -      }
+ +      if (asym_active_balance(env))
+ +              return 1;
   
         /*
          * The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
@@@ -8931,16 -8860,6 +8932,16 @@@
         if (env->src_grp_type == group_misfit_task)
                 return 1;
   
+ +      return 0;
+ +}
+ +
+ +static int need_active_balance(struct lb_env *env)
+ +{
+ +      struct sched_domain *sd = env->sd;
+ +
+ +      if (voluntary_active_balance(env))
+ +              return 1;
+ +
         return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
   }
   
@@@ -9105,7 -9024,7 +9106,7 @@@ more_balance
                 if ((env.flags & LBF_DST_PINNED) && env.imbalance > 0) {
   
                         /* Prevent to re-select dst_cpu via env's CPUs */
- -                      cpumask_clear_cpu(env.dst_cpu, env.cpus);
+ +                      __cpumask_clear_cpu(env.dst_cpu, env.cpus);
   
                         env.dst_rq       = cpu_rq(env.new_dst_cpu);
                         env.dst_cpu      = env.new_dst_cpu;
@@@ -9132,7 -9051,7 +9133,7 @@@
   
                 /* All tasks on this runqueue were pinned by CPU affinity */
                 if (unlikely(env.flags & LBF_ALL_PINNED)) {
- -                      cpumask_clear_cpu(cpu_of(busiest), cpus);
+ +                      __cpumask_clear_cpu(cpu_of(busiest), cpus);
                         /*
                          * Attempting to continue load balancing at the current
                          * sched_domain level only makes sense if there are
@@@ -9202,7 -9121,7 +9203,7 @@@
         } else
                 sd->nr_balance_failed = 0;
   
- -      if (likely(!active_balance)) {
+ +      if (likely(!active_balance) || voluntary_active_balance(&env)) {
                 /* We were unbalanced, so reset the balancing interval */
                 sd->balance_interval = sd->min_interval;
         } else {
@@@ -9551,8 -9470,15 +9552,8 @@@ static void kick_ilb(unsigned int flags
   }
   
   /*
- - * Current heuristic for kicking the idle load balancer in the presence
- - * of an idle cpu in the system.
- - *   - This rq has more than one task.
- - *   - This rq has at least one CFS task and the capacity of the CPU is
- - *     significantly reduced because of RT tasks or IRQs.
- - *   - At parent of LLC scheduler domain level, this cpu's scheduler group has
- - *     multiple busy cpu.
- - *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
- - *     domain span are idle.
+ + * Current decision point for kicking the idle load balancer in the presence
+ + * of idle CPUs in the system.
    */
   static void nohz_balancer_kick(struct rq *rq)
   {
@@@ -9594,13 -9520,8 +9595,13 @@@
         sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
         if (sds) {
                 /*
- -               * XXX: write a coherent comment on why we do this.
- -               * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
+ +               * If there is an imbalance between LLC domains (IOW we could
+ +               * increase the overall cache use), we need some less-loaded LLC
+ +               * domain to pull some load. Likewise, we may need to spread
+ +               * load within the current LLC domain (e.g. packed SMT cores but
+ +               * other CPUs are idle). We can't really know from here how busy
+ +               * the others are - so just get a nohz balance going if it looks
+ +               * like this LLC domain has tasks we could move.
                  */
                 nr_busy = atomic_read(&sds->nr_busy_cpus);
                 if (nr_busy > 1) {
@@@ -9613,7 -9534,7 +9614,7 @@@
         sd = rcu_dereference(rq->sd);
         if (sd) {
                 if ((rq->cfs.h_nr_running >= 1) &&
- -                              check_cpu_capacity(rq, sd)) {
+ +                  check_cpu_capacity(rq, sd)) {
                         flags = NOHZ_KICK_MASK;
                         goto unlock;
                 }
@@@ -9621,7 -9542,11 +9622,7 @@@
   
         sd = rcu_dereference(per_cpu(sd_asym_packing, cpu));
         if (sd) {
- -              for_each_cpu(i, sched_domain_span(sd)) {
- -                      if (i == cpu ||
- -                          !cpumask_test_cpu(i, nohz.idle_cpus_mask))
- -                              continue;
- -
+ +              for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
                         if (sched_asym_prefer(i, cpu)) {
                                 flags = NOHZ_KICK_MASK;
                                 goto unlock;
@@@ -10622,10 -10547,10 +10623,10 @@@ const struct sched_class fair_sched_cla
   #ifdef CONFIG_SCHED_DEBUG
   void print_cfs_stats(struct seq_file *m, int cpu)
   {
- -      struct cfs_rq *cfs_rq;
+ +      struct cfs_rq *cfs_rq, *pos;
   
         rcu_read_lock();
- -      for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
+ +      for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
                 print_cfs_rq(m, cpu, cfs_rq);
         rcu_read_unlock();
   }
diff --combined kernel/sysctl.c

index 7c2b9bc,9c78e06..14f30b4
--- 1/kernel/sysctl.c
--- 2/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@@ -472,17 -472,6 +472,17 @@@ static struct ctl_table kern_table[] = 
                 .extra1         = &one,
         },
   #endif
+ +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+ +      {
+ +              .procname       = "sched_energy_aware",
+ +              .data           = &sysctl_sched_energy_aware,
+ +              .maxlen         = sizeof(unsigned int),
+ +              .mode           = 0644,
+ +              .proc_handler   = sched_energy_aware_handler,
+ +              .extra1         = &zero,
+ +              .extra2         = &one,
+ +      },
+ +#endif
   #ifdef CONFIG_PROVE_LOCKING
         {
                 .procname       = "prove_locking",
@@@ -1471,7 -1460,7 +1471,7 @@@ static struct ctl_table vm_table[] = 
                 .data           = &sysctl_extfrag_threshold,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = sysctl_extfrag_handler,
+               .proc_handler   = proc_dointvec_minmax,
                 .extra1         = &min_extfrag_threshold,
                 .extra2         = &max_extfrag_threshold,
         },
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 6 Mar 2019 18:31:36 +0000 (10:31 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 6 Mar 2019 18:31:36 +0000 (10:31 -0800)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/stat.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/task_nommu.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/init_task.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/kthread.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/fair.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl.c	patch \|	diff1 \|	diff2 \|	blob \| history