patches-5.15.92-rt57

author Thomas Gleixner <tglx@linutronix.de>

Tue, 21 Sep 2021 21:12:50 +0000 (23:12 +0200)

committer Hoegeun Kwon <hoegeun.kwon@samsung.com>

Thu, 3 Aug 2023 08:55:09 +0000 (17:55 +0900)
author Thomas Gleixner <tglx@linutronix.de>
Tue, 21 Sep 2021 21:12:50 +0000 (23:12 +0200)
committer Hoegeun Kwon <hoegeun.kwon@samsung.com>
Thu, 3 Aug 2023 08:55:09 +0000 (17:55 +0900)
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst

index 41191b5..c45291a 100644 (file)
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -64,6 +64,7 @@ Brief summary of control files.
                                      threads
   cgroup.procs                       show list of processes
   cgroup.event_control               an interface for event_fd()
+                                    This knob is not available on CONFIG_PREEMPT_RT systems.
   memory.usage_in_bytes              show current usage for memory
                                      (See 5.5 for details)
   memory.memsw.usage_in_bytes        show current usage for memory+Swap
@@ -75,6 +76,7 @@ Brief summary of control files.
   memory.max_usage_in_bytes          show max memory usage recorded
   memory.memsw.max_usage_in_bytes     show max memory+Swap usage recorded
   memory.soft_limit_in_bytes         set/show soft limit of memory usage
+                                    This knob is not available on CONFIG_PREEMPT_RT systems.
   memory.stat                        show various statistics
   memory.use_hierarchy               set/show hierarchical account enabled
                                       This knob is deprecated and shouldn't be
diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst

index d2c4c27..d83c9ab 100644 (file)
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -50,6 +50,7 @@ program using kcov:
      #include <sys/mman.h>
      #include <unistd.h>
      #include <fcntl.h>
+    #include <linux/types.h>
  
      #define KCOV_INIT_TRACE                    _IOR('c', 1, unsigned long)
      #define KCOV_ENABLE                        _IO('c', 100)
@@ -177,6 +178,8 @@ Comparison operands collection is similar to coverage collection:
         /* Read number of comparisons collected. */
         n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
         for (i = 0; i < n; i++) {
+               uint64_t ip;
+
                 type = cover[i * KCOV_WORDS_PER_CMP + 1];
                 /* arg1 and arg2 - operands of the comparison. */
                 arg1 = cover[i * KCOV_WORDS_PER_CMP + 2];
@@ -251,6 +254,8 @@ selectively from different subsystems.
  
  .. code-block:: c
  
+    /* Same includes and defines as above. */
+
      struct kcov_remote_arg {
         __u32           trace_mode;
         __u32           area_size;
diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h

index 1d5716b..2526fd3 100644 (file)
--- a/arch/alpha/include/asm/spinlock_types.h
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
  #ifndef _ALPHA_SPINLOCK_TYPES_H
  #define _ALPHA_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index a8ae17f..0e8631b 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -32,6 +32,7 @@ config ARM
         select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
         select ARCH_SUPPORTS_ATOMIC_RMW
         select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
+       select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
         select ARCH_USE_BUILTIN_BSWAP
         select ARCH_USE_CMPXCHG_LOCKREF
         select ARCH_USE_MEMTEST
@@ -68,7 +69,7 @@ config ARM
         select HARDIRQS_SW_RESEND
         select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
         select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
-       select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+       select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
         select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
         select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
         select HAVE_ARCH_MMAP_RND_BITS if MMU
@@ -109,6 +110,7 @@ config ARM
         select HAVE_PERF_EVENTS
         select HAVE_PERF_REGS
         select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_PREEMPT_LAZY
         select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
         select HAVE_REGS_AND_STACK_ACCESS_API
         select HAVE_RSEQ
@@ -124,6 +126,7 @@ config ARM
         select OLD_SIGSUSPEND3
         select PCI_SYSCALL if PCI
         select PERF_USE_VMALLOC
+       select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
         select RTC_LIB
         select SYS_SUPPORTS_APM_EMULATION
         select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h

index 5976958..0c14b36 100644 (file)
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
  #ifndef __ASM_SPINLOCK_TYPES_H
  #define __ASM_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h

index b682189..e5e2ceb 100644 (file)
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -52,6 +52,7 @@ struct cpu_context_save {
  struct thread_info {
         unsigned long           flags;          /* low level flags */
         int                     preempt_count;  /* 0 => preemptable, <0 => bug */
+       int                     preempt_lazy_count; /* 0 => preemptable, <0 => bug */
         struct task_struct      *task;          /* main task structure */
         __u32                   cpu;            /* cpu */
         __u32                   cpu_domain;     /* cpu domain */
@@ -130,6 +131,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
  #define TIF_NOTIFY_RESUME      2       /* callback before returning to user */
  #define TIF_UPROBE             3       /* breakpointed or singlestepping */
  #define TIF_NOTIFY_SIGNAL      4       /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY  9
  
  #define TIF_USING_IWMMXT       17
  #define TIF_MEMDIE             18      /* is terminating due to OOM killer */
@@ -149,6 +151,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
  #define _TIF_SYSCALL_TRACEPOINT        (1 << TIF_SYSCALL_TRACEPOINT)
  #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
  #define _TIF_NOTIFY_SIGNAL     (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  #define _TIF_USING_IWMMXT      (1 << TIF_USING_IWMMXT)
  
  /* Checks for any syscall work in entry-common.S */
@@ -158,7 +161,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
  /*
   * Change these and you break ASM code in entry-common.S
   */
-#define _TIF_WORK_MASK         (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK         (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+                                _TIF_SIGPENDING | \
                                  _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
                                  _TIF_NOTIFY_SIGNAL)
  
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c

index a646a3f..beb09d7 100644 (file)
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -43,6 +43,7 @@ int main(void)
    BLANK();
    DEFINE(TI_FLAGS,             offsetof(struct thread_info, flags));
    DEFINE(TI_PREEMPT,           offsetof(struct thread_info, preempt_count));
+  DEFINE(TI_PREEMPT_LAZY,      offsetof(struct thread_info, preempt_lazy_count));
    DEFINE(TI_TASK,              offsetof(struct thread_info, task));
    DEFINE(TI_CPU,               offsetof(struct thread_info, cpu));
    DEFINE(TI_CPU_DOMAIN,                offsetof(struct thread_info, cpu_domain));
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S

index 68261a8..fa7d110 100644 (file)
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -206,11 +206,18 @@ __irq_svc:
  
  #ifdef CONFIG_PREEMPTION
         ldr     r8, [tsk, #TI_PREEMPT]          @ get preempt count
-       ldr     r0, [tsk, #TI_FLAGS]            @ get flags
         teq     r8, #0                          @ if preempt count != 0
+       bne     1f                              @ return from exeption
+       ldr     r0, [tsk, #TI_FLAGS]            @ get flags
+       tst     r0, #_TIF_NEED_RESCHED          @ if NEED_RESCHED is set
+       blne    svc_preempt                     @ preempt!
+
+       ldr     r8, [tsk, #TI_PREEMPT_LAZY]     @ get preempt lazy count
+       teq     r8, #0                          @ if preempt lazy count != 0
         movne   r0, #0                          @ force flags to 0
-       tst     r0, #_TIF_NEED_RESCHED
+       tst     r0, #_TIF_NEED_RESCHED_LAZY
         blne    svc_preempt
+1:
  #endif
  
         svc_exit r5, irq = 1                    @ return from exception
@@ -225,8 +232,14 @@ svc_preempt:
  1:     bl      preempt_schedule_irq            @ irq en/disable is done inside
         ldr     r0, [tsk, #TI_FLAGS]            @ get new tasks TI_FLAGS
         tst     r0, #_TIF_NEED_RESCHED
+       bne     1b
+       tst     r0, #_TIF_NEED_RESCHED_LAZY
         reteq   r8                              @ go again
-       b       1b
+       ldr     r0, [tsk, #TI_PREEMPT_LAZY]     @ get preempt lazy count
+       teq     r0, #0                          @ if preempt lazy count != 0
+       beq     1b
+       ret     r8                              @ go again
+
  #endif
  
  __und_fault:
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c

index 539897a..4655f04 100644 (file)
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
          */
         trace_hardirqs_off();
         do {
-               if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+               if (likely(thread_flags & (_TIF_NEED_RESCHED |
+                                          _TIF_NEED_RESCHED_LAZY))) {
                         schedule();
                 } else {
                         if (unlikely(!user_mode(regs)))
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c

index af51778..1de0160 100644 (file)
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
         if (addr < TASK_SIZE)
                 return do_page_fault(addr, fsr, regs);
  
+       if (interrupts_enabled(regs))
+               local_irq_enable();
+
         if (user_mode(regs))
                 goto bad_area;
  
@@ -470,6 +473,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  static int
  do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  {
+       if (interrupts_enabled(regs))
+               local_irq_enable();
+
         do_bad_area(addr, fsr, regs);
         return 0;
  }
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index 9d3cbe7..c86b845 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -88,6 +88,7 @@ config ARM64
         select ARCH_SUPPORTS_ATOMIC_RMW
         select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
         select ARCH_SUPPORTS_NUMA_BALANCING
+       select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
         select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
         select ARCH_WANT_DEFAULT_BPF_JIT
         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -191,6 +192,7 @@ config ARM64
         select HAVE_PERF_REGS
         select HAVE_PERF_USER_STACK_DUMP
         select HAVE_REGS_AND_STACK_ACCESS_API
+       select HAVE_PREEMPT_LAZY
         select HAVE_FUNCTION_ARG_ACCESS_API
         select HAVE_FUTEX_CMPXCHG if FUTEX
         select MMU_GATHER_RCU_TABLE_FREE
@@ -212,6 +214,7 @@ config ARM64
         select PCI_DOMAINS_GENERIC if PCI
         select PCI_ECAM if (ACPI && PCI)
         select PCI_SYSCALL if PCI
+       select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
         select POWER_RESET
         select POWER_SUPPLY
         select SPARSE_IRQ
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h

index ed57717..63b3922 100644 (file)
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1001,7 +1001,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
   */
  static inline bool arch_faults_on_old_pte(void)
  {
-       WARN_ON(preemptible());
+       WARN_ON(is_migratable());
  
         return !cpu_has_hw_af();
  }
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h

index e83f098..2545c17 100644 (file)
--- a/arch/arm64/include/asm/preempt.h
+++ b/arch/arm64/include/asm/preempt.h
@@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_and_test(void)
          * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
          * pair.
          */
-       return !pc || !READ_ONCE(ti->preempt_count);
+       if (!pc || !READ_ONCE(ti->preempt_count))
+               return true;
+#ifdef CONFIG_PREEMPT_LAZY
+       if ((pc & ~PREEMPT_NEED_RESCHED))
+               return false;
+       if (current_thread_info()->preempt_lazy_count)
+               return false;
+       return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+       return false;
+#endif
  }
  
  static inline bool should_resched(int preempt_offset)
  {
+#ifdef CONFIG_PREEMPT_LAZY
+       u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+       if (pc == preempt_offset)
+               return true;
+
+       if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
+               return false;
+
+       if (current_thread_info()->preempt_lazy_count)
+               return false;
+       return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
         u64 pc = READ_ONCE(current_thread_info()->preempt_count);
         return pc == preempt_offset;
+#endif
  }
  
  #ifdef CONFIG_PREEMPTION
diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h

index ef449f5..5e535c3 100644 (file)
--- a/arch/arm64/include/asm/signal.h
+++ b/arch/arm64/include/asm/signal.h
@@ -22,4 +22,8 @@ static inline void __user *arch_untagged_si_addr(void __user *addr,
  }
  #define arch_untagged_si_addr arch_untagged_si_addr
  
+#if defined(CONFIG_PREEMPT_RT)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
  #endif
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h

index 18782f0..11ab1c0 100644 (file)
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -5,7 +5,7 @@
  #ifndef __ASM_SPINLOCK_TYPES_H
  #define __ASM_SPINLOCK_TYPES_H
  
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h

index 6623c99..c55ccec 100644 (file)
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -26,6 +26,7 @@ struct thread_info {
  #ifdef CONFIG_ARM64_SW_TTBR0_PAN
         u64                     ttbr0;          /* saved TTBR0_EL1 */
  #endif
+       int                     preempt_lazy_count;     /* 0 => preemptable, <0 => bug */
         union {
                 u64             preempt_count;  /* 0 => preemptible, <0 => bug */
                 struct {
@@ -67,6 +68,7 @@ int arch_dup_task_struct(struct task_struct *dst,
  #define TIF_UPROBE             4       /* uprobe breakpoint or singlestep */
  #define TIF_MTE_ASYNC_FAULT    5       /* MTE Asynchronous Tag Check Fault */
  #define TIF_NOTIFY_SIGNAL      6       /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY  7
  #define TIF_SYSCALL_TRACE      8       /* syscall trace active */
  #define TIF_SYSCALL_AUDIT      9       /* syscall auditing */
  #define TIF_SYSCALL_TRACEPOINT 10      /* syscall tracepoint for ftrace */
@@ -97,8 +99,10 @@ int arch_dup_task_struct(struct task_struct *dst,
  #define _TIF_SVE               (1 << TIF_SVE)
  #define _TIF_MTE_ASYNC_FAULT   (1 << TIF_MTE_ASYNC_FAULT)
  #define _TIF_NOTIFY_SIGNAL     (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  
-#define _TIF_WORK_MASK         (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK         (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+                                _TIF_SIGPENDING | \
                                  _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
                                  _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
                                  _TIF_NOTIFY_SIGNAL)
@@ -107,6 +111,8 @@ int arch_dup_task_struct(struct task_struct *dst,
                                  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
                                  _TIF_SYSCALL_EMU)
  
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+
  #ifdef CONFIG_SHADOW_CALL_STACK
  #define INIT_SCS                                                       \
         .scs_base       = init_shadow_call_stack,                       \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c

index 551427a..96a4f6c 100644 (file)
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -31,6 +31,7 @@ int main(void)
    BLANK();
    DEFINE(TSK_TI_FLAGS,         offsetof(struct task_struct, thread_info.flags));
    DEFINE(TSK_TI_PREEMPT,       offsetof(struct task_struct, thread_info.preempt_count));
+  DEFINE(TSK_TI_PREEMPT_LAZY,  offsetof(struct task_struct, thread_info.preempt_lazy_count));
  #ifdef CONFIG_ARM64_SW_TTBR0_PAN
    DEFINE(TSK_TI_TTBR0,         offsetof(struct task_struct, thread_info.ttbr0));
  #endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c

index 7a3fcf2..5689d2b 100644 (file)
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -179,10 +179,19 @@ static void __get_cpu_fpsimd_context(void)
   *
   * The double-underscore version must only be called if you know the task
   * can't be preempted.
+ *
+ * On RT kernels local_bh_disable() is not sufficient because it only
+ * serializes soft interrupt related sections via a local lock, but stays
+ * preemptible. Disabling preemption is the right choice here as bottom
+ * half processing is always in thread context on RT kernels so it
+ * implicitly prevents bottom half processing as well.
   */
  static void get_cpu_fpsimd_context(void)
  {
-       local_bh_disable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_bh_disable();
+       else
+               preempt_disable();
         __get_cpu_fpsimd_context();
  }
  
@@ -203,7 +212,10 @@ static void __put_cpu_fpsimd_context(void)
  static void put_cpu_fpsimd_context(void)
  {
         __put_cpu_fpsimd_context();
-       local_bh_enable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_bh_enable();
+       else
+               preempt_enable();
  }
  
  static bool have_cpu_fpsimd_context(void)
@@ -1033,6 +1045,7 @@ void fpsimd_thread_switch(struct task_struct *next)
  void fpsimd_flush_thread(void)
  {
         int vl, supported_vl;
+       void *sve_state = NULL;
  
         if (!system_supports_fpsimd())
                 return;
@@ -1045,7 +1058,10 @@ void fpsimd_flush_thread(void)
  
         if (system_supports_sve()) {
                 clear_thread_flag(TIF_SVE);
-               sve_free(current);
+
+               /* Defer kfree() while in atomic context */
+               sve_state = current->thread.sve_state;
+               current->thread.sve_state = NULL;
  
                 /*
                  * Reset the task vector length as required.
@@ -1079,6 +1095,7 @@ void fpsimd_flush_thread(void)
         }
  
         put_cpu_fpsimd_context();
+       kfree(sve_state);
  }
  
  /*
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c

index b3e1bec..0318356 100644 (file)
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -922,7 +922,7 @@ static void do_signal(struct pt_regs *regs)
  void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
  {
         do {
-               if (thread_flags & _TIF_NEED_RESCHED) {
+               if (thread_flags & _TIF_NEED_RESCHED_MASK) {
                         /* Unmask Debug and SError for the next task */
                         local_daif_restore(DAIF_PROCCTX_NOIRQ);
  
@@ -930,6 +930,14 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
                 } else {
                         local_daif_restore(DAIF_PROCCTX);
  
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+                       if (unlikely(current->forced_info.si_signo)) {
+                               struct task_struct *t = current;
+                               force_sig_info(&t->forced_info);
+                               t->forced_info.si_signo = 0;
+                       }
+#endif
+
                         if (thread_flags & _TIF_UPROBE)
                                 uprobe_notify_resume(regs);
  
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c

index 3fe816c..ba8c69c 100644 (file)
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -828,7 +828,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                  * involves poking the GIC, which must be done in a
                  * non-preemptible context.
                  */
-               preempt_disable();
+               migrate_disable();
  
                 kvm_pmu_flush_hwstate(vcpu);
  
@@ -852,7 +852,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                                 kvm_timer_sync_user(vcpu);
                         kvm_vgic_sync_hwstate(vcpu);
                         local_irq_enable();
-                       preempt_enable();
+                       migrate_enable();
                         continue;
                 }
  
@@ -921,7 +921,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                 /* Exit types that need handling before we can be preempted */
                 handle_exit_early(vcpu, ret);
  
-               preempt_enable();
+               migrate_enable();
  
                 /*
                  * The ARMv8 architecture doesn't give the hypervisor
diff --git a/arch/csky/include/asm/spinlock_types.h b/arch/csky/include/asm/spinlock_types.h

index 8ff0f6f..db87a12 100644 (file)
--- a/arch/csky/include/asm/spinlock_types.h
+++ b/arch/csky/include/asm/spinlock_types.h
@@ -3,7 +3,7 @@
  #ifndef __ASM_CSKY_SPINLOCK_TYPES_H
  #define __ASM_CSKY_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h

index 19d2334..d5f6649 100644 (file)
--- a/arch/hexagon/include/asm/spinlock_types.h
+++ b/arch/hexagon/include/asm/spinlock_types.h
@@ -8,7 +8,7 @@
  #ifndef _ASM_SPINLOCK_TYPES_H
  #define _ASM_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h

index 6e345fe..14b8a16 100644 (file)
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
  #ifndef _ASM_IA64_SPINLOCK_TYPES_H
  #define _ASM_IA64_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig

index 27222b7..5495225 100644 (file)
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -151,6 +151,7 @@ config PPC
         select ARCH_STACKWALK
         select ARCH_SUPPORTS_ATOMIC_RMW
         select ARCH_SUPPORTS_DEBUG_PAGEALLOC    if PPC_BOOK3S || PPC_8xx || 40x
+       select ARCH_SUPPORTS_RT                 if HAVE_POSIX_CPU_TIMERS_TASK_WORK
         select ARCH_USE_BUILTIN_BSWAP
         select ARCH_USE_CMPXCHG_LOCKREF         if PPC64
         select ARCH_USE_MEMTEST
@@ -218,6 +219,7 @@ config PPC
         select HAVE_HW_BREAKPOINT               if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
         select HAVE_IOREMAP_PROT
         select HAVE_IRQ_TIME_ACCOUNTING
+       select HAVE_POSIX_CPU_TIMERS_TASK_WORK  if !KVM
         select HAVE_KERNEL_GZIP
         select HAVE_KERNEL_LZMA                 if DEFAULT_UIMAGE
         select HAVE_KERNEL_LZO                  if DEFAULT_UIMAGE
@@ -234,6 +236,7 @@ config PPC
         select HAVE_PERF_EVENTS_NMI             if PPC64
         select HAVE_PERF_REGS
         select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_PREEMPT_LAZY
         select HAVE_REGS_AND_STACK_ACCESS_API
         select HAVE_RELIABLE_STACKTRACE
         select HAVE_RSEQ
diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h

index 0f3cdd8..0824333 100644 (file)
--- a/arch/powerpc/include/asm/simple_spinlock_types.h
+++ b/arch/powerpc/include/asm/simple_spinlock_types.h
@@ -2,7 +2,7 @@
  #ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
  #define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h

index 7ef1cd8..f9e63ca 100644 (file)
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -62,6 +62,7 @@ struct smp_ops_t {
  
  extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
  extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
+extern void smp_send_debugger_break_cpu(unsigned int cpu);
  extern void smp_send_debugger_break(void);
  extern void start_secondary_resume(void);
  extern void smp_generic_give_timebase(void);
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h

index c5d742f..d5f8a74 100644 (file)
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
  #ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
  #define _ASM_POWERPC_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h

index 1c8460e..b1653c1 100644 (file)
--- a/arch/powerpc/include/asm/stackprotector.h
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -24,7 +24,11 @@ static __always_inline void boot_init_stack_canary(void)
         unsigned long canary;
  
         /* Try to get a semi random initial value. */
+#ifdef CONFIG_PREEMPT_RT
+       canary = (unsigned long)&canary;
+#else
         canary = get_random_canary();
+#endif
         canary ^= mftb();
         canary ^= LINUX_VERSION_CODE;
         canary &= CANARY_MASK;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h

index 87013ac..2920ed3 100644 (file)
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -53,6 +53,8 @@
  struct thread_info {
         int             preempt_count;          /* 0 => preemptable,
                                                    <0 => BUG */
+       int             preempt_lazy_count;     /* 0 => preemptable,
+                                                  <0 => BUG */
         unsigned long   local_flags;            /* private flags for thread */
  #ifdef CONFIG_LIVEPATCH
         unsigned long *livepatch_sp;
@@ -99,6 +101,7 @@ void arch_setup_new_exec(void);
  #define TIF_PATCH_PENDING      6       /* pending live patching update */
  #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
  #define TIF_SINGLESTEP         8       /* singlestepping active */
+#define TIF_NEED_RESCHED_LAZY  9       /* lazy rescheduling necessary */
  #define TIF_SECCOMP            10      /* secure computing */
  #define TIF_RESTOREALL         11      /* Restore all regs (implies NOERROR) */
  #define TIF_NOERROR            12      /* Force successful syscall return */
@@ -114,6 +117,7 @@ void arch_setup_new_exec(void);
  #define TIF_POLLING_NRFLAG     19      /* true if poll_idle() is polling TIF_NEED_RESCHED */
  #define TIF_32BIT              20      /* 32 bit binary */
  
+
  /* as above, but as bit values */
  #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
  #define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
@@ -125,6 +129,7 @@ void arch_setup_new_exec(void);
  #define _TIF_PATCH_PENDING     (1<<TIF_PATCH_PENDING)
  #define _TIF_SYSCALL_AUDIT     (1<<TIF_SYSCALL_AUDIT)
  #define _TIF_SINGLESTEP                (1<<TIF_SINGLESTEP)
+#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
  #define _TIF_SECCOMP           (1<<TIF_SECCOMP)
  #define _TIF_RESTOREALL                (1<<TIF_RESTOREALL)
  #define _TIF_NOERROR           (1<<TIF_NOERROR)
@@ -138,10 +143,12 @@ void arch_setup_new_exec(void);
                                  _TIF_SYSCALL_EMU)
  
  #define _TIF_USER_WORK_MASK    (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+                                _TIF_NEED_RESCHED_LAZY | \
                                  _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
                                  _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
                                  _TIF_NOTIFY_SIGNAL)
  #define _TIF_PERSYSCALL_MASK   (_TIF_RESTOREALL|_TIF_NOERROR)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  
  /* Bits in local_flags */
  /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c

index df048e3..a81225f 100644 (file)
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -346,7 +346,7 @@ again:
         ti_flags = READ_ONCE(current_thread_info()->flags);
         while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
                 local_irq_enable();
-               if (ti_flags & _TIF_NEED_RESCHED) {
+               if (ti_flags & _TIF_NEED_RESCHED_MASK) {
                         schedule();
                 } else {
                         /*
@@ -552,11 +552,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
                 /* Returning to a kernel context with local irqs enabled. */
                 WARN_ON_ONCE(!(regs->msr & MSR_EE));
  again:
-               if (IS_ENABLED(CONFIG_PREEMPT)) {
+               if (IS_ENABLED(CONFIG_PREEMPTION)) {
                         /* Return to preemptible kernel context */
                         if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) {
                                 if (preempt_count() == 0)
                                         preempt_schedule_irq();
+                       } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) {
+                               if ((preempt_count() == 0) &&
+                                   (current_thread_info()->preempt_lazy_count == 0))
+                                       preempt_schedule_irq();
                         }
                 }
  
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c

index c4f1d6b..02e17a5 100644 (file)
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -690,6 +690,7 @@ static inline void check_stack_overflow(void)
         }
  }
  
+#ifndef CONFIG_PREEMPT_RT
  static __always_inline void call_do_softirq(const void *sp)
  {
         /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
@@ -708,6 +709,7 @@ static __always_inline void call_do_softirq(const void *sp)
                    "r11", "r12"
         );
  }
+#endif
  
  static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
  {
@@ -820,10 +822,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly;
  void *softirq_ctx[NR_CPUS] __read_mostly;
  void *hardirq_ctx[NR_CPUS] __read_mostly;
  
+#ifndef CONFIG_PREEMPT_RT
  void do_softirq_own_stack(void)
  {
         call_do_softirq(softirq_ctx[smp_processor_id()]);
  }
+#endif
  
  irq_hw_number_t virq_to_hw(unsigned int virq)
  {
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c

index bdee726..d57d374 100644 (file)
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -120,11 +120,19 @@ int kgdb_skipexception(int exception, struct pt_regs *regs)
  
  static int kgdb_debugger_ipi(struct pt_regs *regs)
  {
-       kgdb_nmicallback(raw_smp_processor_id(), regs);
+       int cpu = raw_smp_processor_id();
+
+       if (!kgdb_roundup_delay(cpu))
+               kgdb_nmicallback(cpu, regs);
         return 0;
  }
  
  #ifdef CONFIG_SMP
+void kgdb_roundup_cpu(unsigned int cpu)
+{
+       smp_send_debugger_break_cpu(cpu);
+}
+
  void kgdb_roundup_cpus(void)
  {
         smp_send_debugger_break();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index fb95f92..308765f 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -590,6 +590,11 @@ static void debugger_ipi_callback(struct pt_regs *regs)
         debugger_ipi(regs);
  }
  
+void smp_send_debugger_break_cpu(unsigned int cpu)
+{
+       smp_send_nmi_ipi(cpu, debugger_ipi_callback, 1000000);
+}
+
  void smp_send_debugger_break(void)
  {
         smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c

index a08bb7c..ae34f68 100644 (file)
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -260,12 +260,17 @@ static char *get_mmu_str(void)
  
  static int __die(const char *str, struct pt_regs *regs, long err)
  {
+       const char *pr = "";
+
         printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
  
+       if (IS_ENABLED(CONFIG_PREEMPTION))
+               pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
         printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
                IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
                PAGE_SIZE / 1024, get_mmu_str(),
-              IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+              pr,
                IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
                IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
                debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig

index ff581d7..e5c84d5 100644 (file)
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -178,6 +178,7 @@ config KVM_E500MC
  config KVM_MPIC
         bool "KVM in-kernel MPIC emulation"
         depends on KVM && E500
+       depends on !PREEMPT_RT
         select HAVE_KVM_IRQCHIP
         select HAVE_KVM_IRQFD
         select HAVE_KVM_IRQ_ROUTING
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c

index 8322ca8..f524145 100644 (file)
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -24,6 +24,7 @@
  #include <linux/of.h>
  #include <linux/iommu.h>
  #include <linux/rculist.h>
+#include <linux/local_lock.h>
  #include <asm/io.h>
  #include <asm/prom.h>
  #include <asm/rtas.h>
@@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
         return ret;
  }
  
-static DEFINE_PER_CPU(__be64 *, tce_page);
+struct tce_page {
+       __be64 * page;
+       local_lock_t lock;
+};
+static DEFINE_PER_CPU(struct tce_page, tce_page) = {
+       .lock = INIT_LOCAL_LOCK(lock),
+};
  
  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                                      long npages, unsigned long uaddr,
@@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                                            direction, attrs);
         }
  
-       local_irq_save(flags);  /* to protect tcep and the page behind it */
+       /* to protect tcep and the page behind it */
+       local_lock_irqsave(&tce_page.lock, flags);
  
-       tcep = __this_cpu_read(tce_page);
+       tcep = __this_cpu_read(tce_page.page);
  
         /* This is safe to do since interrupts are off when we're called
          * from iommu_alloc{,_sg}()
@@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
                 /* If allocation fails, fall back to the loop implementation */
                 if (!tcep) {
-                       local_irq_restore(flags);
+                       local_unlock_irqrestore(&tce_page.lock, flags);
                         return tce_build_pSeriesLP(tbl->it_index, tcenum,
                                         tceshift,
                                         npages, uaddr, direction, attrs);
                 }
-               __this_cpu_write(tce_page, tcep);
+               __this_cpu_write(tce_page.page, tcep);
         }
  
         rpn = __pa(uaddr) >> tceshift;
@@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                 tcenum += limit;
         } while (npages > 0 && !rc);
  
-       local_irq_restore(flags);
+       local_unlock_irqrestore(&tce_page.lock, flags);
  
         if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
                 ret = (int)rc;
@@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
                                 DMA_BIDIRECTIONAL, 0);
         }
  
-       local_irq_disable();    /* to protect tcep and the page behind it */
-       tcep = __this_cpu_read(tce_page);
+       /* to protect tcep and the page behind it */
+       local_lock_irq(&tce_page.lock);
+       tcep = __this_cpu_read(tce_page.page);
  
         if (!tcep) {
                 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
                 if (!tcep) {
-                       local_irq_enable();
+                       local_unlock_irq(&tce_page.lock);
                         return -ENOMEM;
                 }
-               __this_cpu_write(tce_page, tcep);
+               __this_cpu_write(tce_page.page, tcep);
         }
  
         proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
@@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
  
         /* error cleanup: caller will clear whole range */
  
-       local_irq_enable();
+       local_unlock_irq(&tce_page.lock);
         return rc;
  }
  
diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h

index f398e76..5a35a49 100644 (file)
--- a/arch/riscv/include/asm/spinlock_types.h
+++ b/arch/riscv/include/asm/spinlock_types.h
@@ -6,7 +6,7 @@
  #ifndef _ASM_RISCV_SPINLOCK_TYPES_H
  #define _ASM_RISCV_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h

index a2bbfd7..b69695e 100644 (file)
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
  #ifndef __ASM_SPINLOCK_TYPES_H
  #define __ASM_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h

index e82369f..907bda4 100644 (file)
--- a/arch/sh/include/asm/spinlock_types.h
+++ b/arch/sh/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
  #ifndef __ASM_SH_SPINLOCK_TYPES_H
  #define __ASM_SH_SPINLOCK_TYPES_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c

index ef0f082..2d3eca8 100644 (file)
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu)
         hardirq_ctx[cpu] = NULL;
  }
  
+#ifndef CONFIG_PREEMPT_RT
  void do_softirq_own_stack(void)
  {
         struct thread_info *curctx;
@@ -176,6 +177,7 @@ void do_softirq_own_stack(void)
                   "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
         );
  }
+#endif
  #else
  static inline void handle_one_irq(unsigned int irq)
  {
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c

index c8848bb..41fa1be 100644 (file)
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
         set_irq_regs(old_regs);
  }
  
+#ifndef CONFIG_PREEMPT_RT
  void do_softirq_own_stack(void)
  {
         void *orig_sp, *sp = softirq_stack[smp_processor_id()];
@@ -869,6 +870,7 @@ void do_softirq_own_stack(void)
         __asm__ __volatile__("mov %0, %%sp"
                              : : "r" (orig_sp));
  }
+#endif
  
  #ifdef CONFIG_HOTPLUG_CPU
  void fixup_irqs(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 0f2234c..3411149 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,6 +107,7 @@ config X86
         select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP       if NR_CPUS <= 4096
         select ARCH_SUPPORTS_LTO_CLANG
         select ARCH_SUPPORTS_LTO_CLANG_THIN
+       select ARCH_SUPPORTS_RT
         select ARCH_USE_BUILTIN_BSWAP
         select ARCH_USE_MEMTEST
         select ARCH_USE_QUEUED_RWLOCKS
@@ -230,6 +231,7 @@ config X86
         select HAVE_PCI
         select HAVE_PERF_REGS
         select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_PREEMPT_LAZY
         select MMU_GATHER_RCU_TABLE_FREE                if PARAVIRT
         select HAVE_POSIX_CPU_TIMERS_TASK_WORK
         select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h

index e087cd7..96cc92f 100644 (file)
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -202,6 +202,7 @@
                               IRQ_CONSTRAINTS, regs, vector);           \
  }
  
+#ifndef CONFIG_PREEMPT_RT
  /*
   * Macro to invoke __do_softirq on the irq stack. This is only called from
   * task context when bottom halves are about to be reenabled and soft
@@ -215,6 +216,8 @@
         __this_cpu_write(hardirq_stack_inuse, false);                   \
  }
  
+#endif
+
  #else /* CONFIG_X86_64 */
  /* System vector handlers always run on the stack they interrupted. */
  #define run_sysvec_on_irqstack_cond(func, regs)                                \
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h

index fe5efbc..ab8cb5f 100644 (file)
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -90,17 +90,48 @@ static __always_inline void __preempt_count_sub(int val)
   * a decrement which hits zero means we have no preempt_count and should
   * reschedule.
   */
-static __always_inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool ____preempt_count_dec_and_test(void)
  {
         return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
  }
  
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+       if (____preempt_count_dec_and_test())
+               return true;
+#ifdef CONFIG_PREEMPT_LAZY
+       if (preempt_count())
+               return false;
+       if (current_thread_info()->preempt_lazy_count)
+               return false;
+       return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+       return false;
+#endif
+}
+
  /*
   * Returns true when we need to resched and can (barring IRQ state).
   */
  static __always_inline bool should_resched(int preempt_offset)
  {
+#ifdef CONFIG_PREEMPT_LAZY
+       u32 tmp;
+       tmp = raw_cpu_read_4(__preempt_count);
+       if (tmp == preempt_offset)
+               return true;
+
+       /* preempt count == 0 ? */
+       tmp &= ~PREEMPT_NEED_RESCHED;
+       if (tmp != preempt_offset)
+               return false;
+       /* XXX PREEMPT_LOCK_OFFSET */
+       if (current_thread_info()->preempt_lazy_count)
+               return false;
+       return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
         return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
+#endif
  }
  
  #ifdef CONFIG_PREEMPTION
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h

index 2dfb5fe..fc03f4f 100644 (file)
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -28,6 +28,19 @@ typedef struct {
  #define SA_IA32_ABI    0x02000000u
  #define SA_X32_ABI     0x01000000u
  
+/*
+ * Because some traps use the IST stack, we must keep preemption
+ * disabled while calling do_trap(), but do_trap() may call
+ * force_sig_info() which will grab the signal spin_locks for the
+ * task, which in PREEMPT_RT are mutexes.  By defining
+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
+ * trap.
+ */
+#if defined(CONFIG_PREEMPT_RT)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
  #ifndef CONFIG_COMPAT
  #define compat_sigset_t compat_sigset_t
  typedef sigset_t compat_sigset_t;
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h

index 24a8d6c..2fc22c2 100644 (file)
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -50,7 +50,7 @@
   */
  static __always_inline void boot_init_stack_canary(void)
  {
-       u64 canary;
+       u64 canary = 0;
         u64 tsc;
  
  #ifdef CONFIG_X86_64
@@ -61,8 +61,14 @@ static __always_inline void boot_init_stack_canary(void)
          * of randomness. The TSC only matters for very early init,
          * there it already has some randomness on most systems. Later
          * on during the bootup the random pool has true entropy too.
+        * For preempt-rt we need to weaken the randomness a bit, as
+        * we can't call into the random generator from atomic context
+        * due to locking constraints. We just leave canary
+        * uninitialized and use the TSC based randomness on top of it.
          */
+#ifndef CONFIG_PREEMPT_RT
         get_random_bytes(&canary, sizeof(canary));
+#endif
         tsc = rdtsc();
         canary += tsc + (tsc << 32UL);
         canary &= CANARY_MASK;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h

index cf13266..75dc786 100644 (file)
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -57,11 +57,14 @@ struct thread_info {
         unsigned long           flags;          /* low level flags */
         unsigned long           syscall_work;   /* SYSCALL_WORK_ flags */
         u32                     status;         /* thread synchronous flags */
+       int                     preempt_lazy_count;     /* 0 => lazy preemptable
+                                                          <0 => BUG */
  };
  
  #define INIT_THREAD_INFO(tsk)                  \
  {                                              \
         .flags          = 0,                    \
+       .preempt_lazy_count     = 0,            \
  }
  
  #else /* !__ASSEMBLY__ */
@@ -90,6 +93,7 @@ struct thread_info {
  #define TIF_NOTSC              16      /* TSC is not accessible in userland */
  #define TIF_NOTIFY_SIGNAL      17      /* signal notifications exist */
  #define TIF_SLD                        18      /* Restore split lock detection on context switch */
+#define TIF_NEED_RESCHED_LAZY  19      /* lazy rescheduling necessary */
  #define TIF_MEMDIE             20      /* is terminating due to OOM killer */
  #define TIF_POLLING_NRFLAG     21      /* idle is polling for TIF_NEED_RESCHED */
  #define TIF_IO_BITMAP          22      /* uses I/O bitmap */
@@ -114,6 +118,7 @@ struct thread_info {
  #define _TIF_NOTSC             (1 << TIF_NOTSC)
  #define _TIF_NOTIFY_SIGNAL     (1 << TIF_NOTIFY_SIGNAL)
  #define _TIF_SLD               (1 << TIF_SLD)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  #define _TIF_POLLING_NRFLAG    (1 << TIF_POLLING_NRFLAG)
  #define _TIF_IO_BITMAP         (1 << TIF_IO_BITMAP)
  #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c

index 044902d..e5dd6da 100644 (file)
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -132,6 +132,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)
         return 0;
  }
  
+#ifndef CONFIG_PREEMPT_RT
  void do_softirq_own_stack(void)
  {
         struct irq_stack *irqstk;
@@ -148,6 +149,7 @@ void do_softirq_own_stack(void)
  
         call_on_stack(__do_softirq, isp);
  }
+#endif
  
  void __handle_irq(struct irq_desc *desc, struct pt_regs *regs)
  {
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c

index 3a43a2d..37bd37c 100644 (file)
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -502,9 +502,12 @@ static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
                 if (atomic_read(&kgdb_active) != -1) {
                         /* KGDB CPU roundup */
                         cpu = raw_smp_processor_id();
-                       kgdb_nmicallback(cpu, regs);
-                       set_bit(cpu, was_in_debug_nmi);
-                       touch_nmi_watchdog();
+
+                       if (!kgdb_roundup_delay(cpu)) {
+                               kgdb_nmicallback(cpu, regs);
+                               set_bit(cpu, was_in_debug_nmi);
+                               touch_nmi_watchdog();
+                       }
  
                         return NMI_HANDLED;
                 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index fcfa3fe..9e70abe 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8665,6 +8665,14 @@ int kvm_arch_init(void *opaque)
                 goto out;
         }
  
+#ifdef CONFIG_PREEMPT_RT
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+               pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n");
+               r = -EOPNOTSUPP;
+               goto out;
+       }
+#endif
+
         r = -ENOMEM;
         x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
                                           __alignof__(struct fpu), SLAB_ACCOUNT,
diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h

index 64c9389..797aed7 100644 (file)
--- a/arch/xtensa/include/asm/spinlock_types.h
+++ b/arch/xtensa/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
  #ifndef __ASM_SPINLOCK_TYPES_H
  #define __ASM_SPINLOCK_TYPES_H
  
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
  # error "please don't include this file directly"
  #endif
  
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 9f53b4c..87575f5 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1566,14 +1566,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
                 return;
  
         if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
-               int cpu = get_cpu();
+               int cpu = get_cpu_light();
                 if (cpumask_test_cpu(cpu, hctx->cpumask)) {
                         __blk_mq_run_hw_queue(hctx);
-                       put_cpu();
+                       put_cpu_light();
                         return;
                 }
  
-               put_cpu();
+               put_cpu_light();
         }
  
         kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
diff --git a/crypto/testmgr.c b/crypto/testmgr.c

index 163a128..444183f 100644 (file)
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1061,14 +1061,14 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
  
  static void crypto_disable_simd_for_test(void)
  {
-       preempt_disable();
+       migrate_disable();
         __this_cpu_write(crypto_simd_disabled_for_test, true);
  }
  
  static void crypto_reenable_simd_for_test(void)
  {
         __this_cpu_write(crypto_simd_disabled_for_test, false);
-       preempt_enable();
+       migrate_enable();
  }
  
  /*
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c

index 6383c81..abb695f 100644 (file)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -59,6 +59,40 @@ static void zram_free_page(struct zram *zram, size_t index);
  static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
                                 u32 index, int offset, struct bio *bio);
  
+#ifdef CONFIG_PREEMPT_RT
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
+{
+       size_t index;
+
+       for (index = 0; index < num_pages; index++)
+               spin_lock_init(&zram->table[index].lock);
+}
+
+static int zram_slot_trylock(struct zram *zram, u32 index)
+{
+       int ret;
+
+       ret = spin_trylock(&zram->table[index].lock);
+       if (ret)
+               __set_bit(ZRAM_LOCK, &zram->table[index].flags);
+       return ret;
+}
+
+static void zram_slot_lock(struct zram *zram, u32 index)
+{
+       spin_lock(&zram->table[index].lock);
+       __set_bit(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static void zram_slot_unlock(struct zram *zram, u32 index)
+{
+       __clear_bit(ZRAM_LOCK, &zram->table[index].flags);
+       spin_unlock(&zram->table[index].lock);
+}
+
+#else
+
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
  
  static int zram_slot_trylock(struct zram *zram, u32 index)
  {
@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index)
  {
         bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
  }
+#endif
  
  static inline bool init_done(struct zram *zram)
  {
@@ -1169,6 +1204,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
  
         if (!huge_class_size)
                 huge_class_size = zs_huge_class_size(zram->mem_pool);
+       zram_meta_init_table_locks(zram, num_pages);
         return true;
  }
  
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h

index 80c3b43..d8f6d88 100644 (file)
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -63,6 +63,7 @@ struct zram_table_entry {
                 unsigned long element;
         };
         unsigned long flags;
+       spinlock_t lock;
  #ifdef CONFIG_ZRAM_MEMORY_TRACKING
         ktime_t ac_time;
  #endif
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c

index e53164c..2ff3e10 100644 (file)
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
         return container_of(data, struct tpm_tis_tcg_phy, priv);
  }
  
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * Flushes previous write operations to chip so that a subsequent
+ * ioread*()s won't stall a cpu.
+ */
+static inline void tpm_tis_flush(void __iomem *iobase)
+{
+       ioread8(iobase + TPM_ACCESS(0));
+}
+#else
+#define tpm_tis_flush(iobase) do { } while (0)
+#endif
+
+static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
+{
+       iowrite8(b, iobase + addr);
+       tpm_tis_flush(iobase);
+}
+
+static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
+{
+       iowrite32(b, iobase + addr);
+       tpm_tis_flush(iobase);
+}
+
  static int interrupts = -1;
  module_param(interrupts, int, 0444);
  MODULE_PARM_DESC(interrupts, "Enable interrupts");
@@ -170,7 +195,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
         struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
  
         while (len--)
-               iowrite8(*value++, phy->iobase + addr);
+               tpm_tis_iowrite8(*value++, phy->iobase, addr);
  
         return 0;
  }
@@ -197,7 +222,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
  {
         struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
  
-       iowrite32(value, phy->iobase + addr);
+       tpm_tis_iowrite32(value, phy->iobase, addr);
  
         return 0;
  }
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c

index a2765d6..c801111 100644 (file)
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -66,7 +66,7 @@ struct mm_struct efi_mm = {
  
  struct workqueue_struct *efi_rts_wq;
  
-static bool disable_runtime;
+static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT);
  static int __init setup_noefi(char *arg)
  {
         disable_runtime = true;
@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char *str)
         if (parse_option_str(str, "noruntime"))
                 disable_runtime = true;
  
+       if (parse_option_str(str, "runtime"))
+               disable_runtime = false;
+
         if (parse_option_str(str, "nosoftreserve"))
                 set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
  
diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c

index 254e671..7a39029 100644 (file)
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -425,7 +425,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
          */
         intel_psr_wait_for_idle(new_crtc_state);
  
-       local_irq_disable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_irq_disable();
  
         crtc->debug.min_vbl = min;
         crtc->debug.max_vbl = max;
@@ -450,11 +451,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
                         break;
                 }
  
-               local_irq_enable();
+               if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+                       local_irq_enable();
  
                 timeout = schedule_timeout(timeout);
  
-               local_irq_disable();
+               if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+                       local_irq_disable();
         }
  
         finish_wait(wq, &wait);
@@ -487,7 +490,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
         return;
  
  irq_disable:
-       local_irq_disable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_irq_disable();
  }
  
  #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
@@ -566,7 +570,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
                 new_crtc_state->uapi.event = NULL;
         }
  
-       local_irq_enable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_irq_enable();
  
         /* Send VRR Push to terminate Vblank */
         intel_vrr_send_push(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c

index 209cf26..6e1b906 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -311,10 +311,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
         /* Kick the work once more to drain the signalers, and disarm the irq */
         irq_work_sync(&b->irq_work);
         while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
-               local_irq_disable();
-               signal_irq_work(&b->irq_work);
-               local_irq_enable();
+               irq_work_queue(&b->irq_work);
                 cond_resched();
+               irq_work_sync(&b->irq_work);
         }
  }
  
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h

index c410989..601274b 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -163,7 +163,8 @@ static inline void intel_context_enter(struct intel_context *ce)
  
  static inline void intel_context_mark_active(struct intel_context *ce)
  {
-       lockdep_assert_held(&ce->timeline->mutex);
+       lockdep_assert(lockdep_is_held(&ce->timeline->mutex) ||
+                      test_bit(CONTEXT_IS_PARKED, &ce->flags));
         ++ce->active_count;
  }
  
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h

index a63631e..314457f 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -112,6 +112,7 @@ struct intel_context {
  #define CONTEXT_FORCE_SINGLE_SUBMISSION        7
  #define CONTEXT_NOPREEMPT              8
  #define CONTEXT_LRCA_DIRTY             9
+#define CONTEXT_IS_PARKED              10
  
         struct {
                 u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c

index dacd627..73e96ca 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf)
         return 0;
  }
  
-#if IS_ENABLED(CONFIG_LOCKDEP)
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
-
-       return flags;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
-                                  unsigned long flags)
-{
-       mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
-       local_irq_restore(flags);
-}
-
-#else
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
-       return 0;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
-                                  unsigned long flags)
-{
-}
-
-#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
-
  static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
  {
         struct i915_request *rq = to_request(fence);
@@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
  {
         struct intel_context *ce = engine->kernel_context;
         struct i915_request *rq;
-       unsigned long flags;
         bool result = true;
  
         /* GPU is pointing to the void, as good as in the kernel context. */
@@ -201,7 +167,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
          * engine->wakeref.count, we may see the request completion and retire
          * it causing an underflow of the engine->wakeref.
          */
-       flags = __timeline_mark_lock(ce);
+       set_bit(CONTEXT_IS_PARKED, &ce->flags);
         GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
  
         rq = __i915_request_create(ce, GFP_NOWAIT);
@@ -233,7 +199,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
  
         result = false;
  out_unlock:
-       __timeline_mark_unlock(ce, flags);
+       clear_bit(CONTEXT_IS_PARKED, &ce->flags);
         return result;
  }
  
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c

index 773ff51..f330457 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1286,7 +1286,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
          * and context switches) submission.
          */
  
-       spin_lock(&sched_engine->lock);
+       spin_lock_irq(&sched_engine->lock);
  
         /*
          * If the queue is higher priority than the last
@@ -1386,7 +1386,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                  * Even if ELSP[1] is occupied and not worthy
                                  * of timeslices, our queue might be.
                                  */
-                               spin_unlock(&sched_engine->lock);
+                               spin_unlock_irq(&sched_engine->lock);
                                 return;
                         }
                 }
@@ -1412,7 +1412,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
  
                 if (last && !can_merge_rq(last, rq)) {
                         spin_unlock(&ve->base.sched_engine->lock);
-                       spin_unlock(&engine->sched_engine->lock);
+                       spin_unlock_irq(&engine->sched_engine->lock);
                         return; /* leave this for another sibling */
                 }
  
@@ -1574,7 +1574,7 @@ done:
          */
         sched_engine->queue_priority_hint = queue_prio(sched_engine);
         i915_sched_engine_reset_on_empty(sched_engine);
-       spin_unlock(&sched_engine->lock);
+       spin_unlock_irq(&sched_engine->lock);
  
         /*
          * We can skip poking the HW if we ended up with exactly the same set
@@ -1600,13 +1600,6 @@ done:
         }
  }
  
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
-{
-       local_irq_disable(); /* Suspend interrupts across request submission */
-       execlists_dequeue(engine);
-       local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
-}
-
  static void clear_ports(struct i915_request **ports, int count)
  {
         memset_p((void **)ports, NULL, count);
@@ -2442,7 +2435,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
         }
  
         if (!engine->execlists.pending[0]) {
-               execlists_dequeue_irq(engine);
+               execlists_dequeue(engine);
                 start_timeslice(engine);
         }
  
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index 9bc4f4a..5473472 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -886,7 +886,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
          */
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  
-       /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_disable();
  
         /* Get optional system timestamp before query. */
         if (stime)
@@ -950,7 +951,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
         if (etime)
                 *etime = ktime_get();
  
-       /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_enable();
  
         spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 79da5ec..b9dd610 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -559,7 +559,6 @@ bool __i915_request_submit(struct i915_request *request)
  
         RQ_TRACE(request, "\n");
  
-       GEM_BUG_ON(!irqs_disabled());
         lockdep_assert_held(&engine->sched_engine->lock);
  
         /*
@@ -668,7 +667,6 @@ void __i915_request_unsubmit(struct i915_request *request)
          */
         RQ_TRACE(request, "\n");
  
-       GEM_BUG_ON(!irqs_disabled());
         lockdep_assert_held(&engine->sched_engine->lock);
  
         /*
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h

index 1bc1349..a2f713b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -609,7 +609,8 @@ i915_request_timeline(const struct i915_request *rq)
  {
         /* Valid only while the request is being constructed (or retired). */
         return rcu_dereference_protected(rq->timeline,
-                                        lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
+                                        lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) ||
+                                        test_bit(CONTEXT_IS_PARKED, &rq->context->flags));
  }
  
  static inline struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h

index 63fec1c..f345a0f 100644 (file)
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -2,6 +2,10 @@
  #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
  #define _I915_TRACE_H_
  
+#ifdef CONFIG_PREEMPT_RT
+#define NOTRACE
+#endif
+
  #include <linux/stringify.h>
  #include <linux/types.h>
  #include <linux/tracepoint.h>
@@ -819,7 +823,7 @@ DEFINE_EVENT(i915_request, i915_request_add,
              TP_ARGS(rq)
  );
  
-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
+#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
  DEFINE_EVENT(i915_request, i915_request_guc_submit,
              TP_PROTO(struct i915_request *rq),
              TP_ARGS(rq)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h

index 5259eda..b36b27c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -343,7 +343,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
  #define wait_for(COND, MS)             _wait_for((COND), (MS) * 1000, 10, 1000)
  
  /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
  # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
  #else
  # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c

index 1cf68f8..8ccf0c9 100644 (file)
--- a/drivers/i2c/busses/i2c-cht-wc.c
+++ b/drivers/i2c/busses/i2c-cht-wc.c
@@ -99,15 +99,8 @@ static irqreturn_t cht_wc_i2c_adap_thread_handler(int id, void *data)
          * interrupt handler as well, so running the client irq handler from
          * this thread will cause things to lock up.
          */
-       if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ) {
-               /*
-                * generic_handle_irq expects local IRQs to be disabled
-                * as normally it is called from interrupt context.
-                */
-               local_irq_disable();
-               generic_handle_irq(adap->client_irq);
-               local_irq_enable();
-       }
+       if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ)
+               generic_handle_irq_safe(adap->client_irq);
  
         return IRQ_HANDLED;
  }
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c

index 8fb065c..c232535 100644 (file)
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1422,7 +1422,7 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr)
         if (irq <= 0)
                 return -ENXIO;
  
-       generic_handle_irq(irq);
+       generic_handle_irq_safe(irq);
  
         return 0;
  }
diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig

index 9fbdf5f..59043d0 100644 (file)
--- a/drivers/leds/trigger/Kconfig
+++ b/drivers/leds/trigger/Kconfig
@@ -64,6 +64,7 @@ config LEDS_TRIGGER_BACKLIGHT
  
  config LEDS_TRIGGER_CPU
         bool "LED CPU Trigger"
+       depends on !PREEMPT_RT
         help
           This allows LEDs to be controlled by active CPUs. This shows
           the active CPUs across an array of LEDs so you can see which
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index e4c82ec..d5b330c 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2218,8 +2218,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
         struct raid5_percpu *percpu;
         unsigned long cpu;
  
-       cpu = get_cpu();
+       cpu = get_cpu_light();
         percpu = per_cpu_ptr(conf->percpu, cpu);
+       spin_lock(&percpu->lock);
         if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
                 ops_run_biofill(sh);
                 overlap_clear++;
@@ -2278,7 +2279,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
                         if (test_and_clear_bit(R5_Overlap, &dev->flags))
                                 wake_up(&sh->raid_conf->wait_for_overlap);
                 }
-       put_cpu();
+       spin_unlock(&percpu->lock);
+       put_cpu_light();
  }
  
  static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
@@ -7110,6 +7112,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
                         __func__, cpu);
                 return -ENOMEM;
         }
+       spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
         return 0;
  }
  
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h

index 5c05acf..665fe13 100644 (file)
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -635,6 +635,7 @@ struct r5conf {
         int                     recovery_disabled;
         /* per cpu variables */
         struct raid5_percpu {
+               spinlock_t      lock;           /* Protection for -RT */
                 struct page     *spare_page; /* Used when checking P/Q in raid6 */
                 void            *scribble;  /* space for constructing buffer
                                              * lists and performing address
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c

index 70fa18b..b14d3f9 100644 (file)
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -193,13 +193,11 @@ static void pcap_isr_work(struct work_struct *work)
                 ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr);
                 ezx_pcap_write(pcap, PCAP_REG_ISR, isr);
  
-               local_irq_disable();
                 service = isr & ~msr;
                 for (irq = pcap->irq_base; service; service >>= 1, irq++) {
                         if (service & 1)
-                               generic_handle_irq(irq);
+                               generic_handle_irq_safe(irq);
                 }
-               local_irq_enable();
                 ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
         } while (gpio_get_value(pdata->gpio));
  }
diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c

index 08535e9..0585a58 100644 (file)
--- a/drivers/misc/hi6421v600-irq.c
+++ b/drivers/misc/hi6421v600-irq.c
@@ -118,8 +118,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
                          * If both powerkey down and up IRQs are received,
                          * handle them at the right order
                          */
-                       generic_handle_irq(priv->irqs[POWERKEY_DOWN]);
-                       generic_handle_irq(priv->irqs[POWERKEY_UP]);
+                       generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]);
+                       generic_handle_irq_safe(priv->irqs[POWERKEY_UP]);
                         pending &= ~HISI_IRQ_POWERKEY_UP_DOWN;
                 }
  
@@ -127,7 +127,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
                         continue;
  
                 for_each_set_bit(offset, &pending, BITS_PER_BYTE) {
-                       generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]);
+                       generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]);
                 }
         }
  
diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c

index 2473fb5..2a5cc64 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
@@ -458,7 +458,7 @@ nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle,
  static void
  nfp_abm_stats_calculate(struct nfp_alink_stats *new,
                         struct nfp_alink_stats *old,
-                       struct gnet_stats_basic_packed *bstats,
+                       struct gnet_stats_basic_sync *bstats,
                         struct gnet_stats_queue *qstats)
  {
         _bstats_update(bstats, new->tx_bytes - old->tx_bytes,
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c

index 2ccda40..21eddb6 100644 (file)
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1384,11 +1384,8 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
                 netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
                 lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
  
-               if (dev->domain_data.phyirq > 0) {
-                       local_irq_disable();
-                       generic_handle_irq(dev->domain_data.phyirq);
-                       local_irq_enable();
-               }
+               if (dev->domain_data.phyirq > 0)
+                       generic_handle_irq_safe(dev->domain_data.phyirq);
         } else {
                 netdev_warn(dev->net,
                             "unexpected interrupt: 0x%08x\n", intdata);
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c

index 76dbdae..9674318 100644 (file)
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1450,11 +1450,11 @@ err2:
  static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
  {
         struct fcoe_percpu_s *fps;
-       int rc;
+       int rc, cpu = get_cpu_light();
  
-       fps = &get_cpu_var(fcoe_percpu);
+       fps = &per_cpu(fcoe_percpu, cpu);
         rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
-       put_cpu_var(fcoe_percpu);
+       put_cpu_light();
  
         return rc;
  }
@@ -1639,11 +1639,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
                 return 0;
         }
  
-       stats = per_cpu_ptr(lport->stats, get_cpu());
+       stats = per_cpu_ptr(lport->stats, get_cpu_light());
         stats->InvalidCRCCount++;
         if (stats->InvalidCRCCount < 5)
                 printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
-       put_cpu();
+       put_cpu_light();
         return -EINVAL;
  }
  
@@ -1684,7 +1684,7 @@ static void fcoe_recv_frame(struct sk_buff *skb)
          */
         hp = (struct fcoe_hdr *) skb_network_header(skb);
  
-       stats = per_cpu_ptr(lport->stats, get_cpu());
+       stats = per_cpu_ptr(lport->stats, get_cpu_light());
         if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
                 if (stats->ErrorFrames < 5)
                         printk(KERN_WARNING "fcoe: FCoE version "
@@ -1716,13 +1716,13 @@ static void fcoe_recv_frame(struct sk_buff *skb)
                 goto drop;
  
         if (!fcoe_filter_frames(lport, fp)) {
-               put_cpu();
+               put_cpu_light();
                 fc_exch_recv(lport, fp);
                 return;
         }
  drop:
         stats->ErrorFrames++;
-       put_cpu();
+       put_cpu_light();
         kfree_skb(skb);
  }
  
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c

index 558f3f4..f08feaa 100644 (file)
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
  
         INIT_LIST_HEAD(&del_list);
  
-       stats = per_cpu_ptr(fip->lp->stats, get_cpu());
+       stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
  
         list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
                 deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
@@ -864,7 +864,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
                                 sel_time = fcf->time;
                 }
         }
-       put_cpu();
+       put_cpu_light();
  
         list_for_each_entry_safe(fcf, next, &del_list, list) {
                 /* Removes fcf from current list */
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c

index aa223db..0ceb938 100644 (file)
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
         }
         memset(ep, 0, sizeof(*ep));
  
-       cpu = get_cpu();
+       cpu = get_cpu_light();
         pool = per_cpu_ptr(mp->pool, cpu);
         spin_lock_bh(&pool->lock);
-       put_cpu();
+       put_cpu_light();
  
         /* peek cache of free slot */
         if (pool->left != FC_XID_UNKNOWN) {
diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c

index 7e6347f..8a7cf1d 100644 (file)
--- a/drivers/staging/greybus/gpio.c
+++ b/drivers/staging/greybus/gpio.c
@@ -391,10 +391,7 @@ static int gb_gpio_request_handler(struct gb_operation *op)
                 return -EINVAL;
         }
  
-       local_irq_disable();
-       ret = generic_handle_irq(irq);
-       local_irq_enable();
-
+       ret = generic_handle_irq_safe(irq);
         if (ret)
                 dev_err(dev, "failed to invoke irq handler\n");
  
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h

index b3abc29..c4843be 100644 (file)
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -156,12 +156,55 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value)
         up->dl_write(up, value);
  }
  
+static inline void serial8250_set_IER(struct uart_8250_port *up,
+                                     unsigned char ier)
+{
+       struct uart_port *port = &up->port;
+       unsigned long flags;
+       bool is_console;
+
+       is_console = uart_console(port);
+
+       if (is_console)
+               console_atomic_lock(flags);
+
+       serial_out(up, UART_IER, ier);
+
+       if (is_console)
+               console_atomic_unlock(flags);
+}
+
+static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up)
+{
+       struct uart_port *port = &up->port;
+       unsigned int clearval = 0;
+       unsigned long flags;
+       unsigned int prior;
+       bool is_console;
+
+       is_console = uart_console(port);
+
+       if (up->capabilities & UART_CAP_UUE)
+               clearval = UART_IER_UUE;
+
+       if (is_console)
+               console_atomic_lock(flags);
+
+       prior = serial_port_in(port, UART_IER);
+       serial_port_out(port, UART_IER, clearval);
+
+       if (is_console)
+               console_atomic_unlock(flags);
+
+       return prior;
+}
+
  static inline bool serial8250_set_THRI(struct uart_8250_port *up)
  {
         if (up->ier & UART_IER_THRI)
                 return false;
         up->ier |= UART_IER_THRI;
-       serial_out(up, UART_IER, up->ier);
+       serial8250_set_IER(up, up->ier);
         return true;
  }
  
@@ -170,7 +213,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
         if (!(up->ier & UART_IER_THRI))
                 return false;
         up->ier &= ~UART_IER_THRI;
-       serial_out(up, UART_IER, up->ier);
+       serial8250_set_IER(up, up->ier);
         return true;
  }
  
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c

index f3bfaa1..67f5679 100644 (file)
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -265,10 +265,8 @@ static void serial8250_backup_timeout(struct timer_list *t)
          * Must disable interrupts or else we risk racing with the interrupt
          * based handler.
          */
-       if (up->port.irq) {
-               ier = serial_in(up, UART_IER);
-               serial_out(up, UART_IER, 0);
-       }
+       if (up->port.irq)
+               ier = serial8250_clear_IER(up);
  
         iir = serial_in(up, UART_IIR);
  
@@ -291,7 +289,7 @@ static void serial8250_backup_timeout(struct timer_list *t)
                 serial8250_tx_chars(up);
  
         if (up->port.irq)
-               serial_out(up, UART_IER, ier);
+               serial8250_set_IER(up, ier);
  
         spin_unlock_irqrestore(&up->port.lock, flags);
  
@@ -578,6 +576,14 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
  
  #ifdef CONFIG_SERIAL_8250_CONSOLE
  
+static void univ8250_console_write_atomic(struct console *co, const char *s,
+                                         unsigned int count)
+{
+       struct uart_8250_port *up = &serial8250_ports[co->index];
+
+       serial8250_console_write_atomic(up, s, count);
+}
+
  static void univ8250_console_write(struct console *co, const char *s,
                                    unsigned int count)
  {
@@ -671,6 +677,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx,
  
  static struct console univ8250_console = {
         .name           = "ttyS",
+       .write_atomic   = univ8250_console_write_atomic,
         .write          = univ8250_console_write,
         .device         = uart_console_device,
         .setup          = univ8250_console_setup,
diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c

index af74f82..0bab91e 100644 (file)
--- a/drivers/tty/serial/8250/8250_fsl.c
+++ b/drivers/tty/serial/8250/8250_fsl.c
@@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port *port)
  
         /* Stop processing interrupts on input overrun */
         if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
+               unsigned long flags;
                 unsigned long delay;
+               bool is_console;
  
+               is_console = uart_console(port);
+
+               if (is_console)
+                       console_atomic_lock(flags);
                 up->ier = port->serial_in(port, UART_IER);
+               if (is_console)
+                       console_atomic_unlock(flags);
+
                 if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
                         port->ops->stop_rx(port);
                 } else {
diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c

index 65402d0..8122645 100644 (file)
--- a/drivers/tty/serial/8250/8250_ingenic.c
+++ b/drivers/tty/serial/8250/8250_ingenic.c
@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart",
  
  static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
  {
+       unsigned long flags;
+       bool is_console;
         int ier;
  
         switch (offset) {
@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
                  * If we have enabled modem status IRQs we should enable
                  * modem mode.
                  */
+               is_console = uart_console(p);
+               if (is_console)
+                       console_atomic_lock(flags);
                 ier = p->serial_in(p, UART_IER);
+               if (is_console)
+                       console_atomic_unlock(flags);
  
                 if (ier & UART_IER_MSI)
                         value |= UART_MCR_MDCE | UART_MCR_FCM;
diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c

index de48a58..364ee95 100644 (file)
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -222,12 +222,37 @@ static void mtk8250_shutdown(struct uart_port *port)
  
  static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
  {
-       serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
+       struct uart_port *port = &up->port;
+       unsigned long flags;
+       unsigned int ier;
+       bool is_console;
+
+       is_console = uart_console(port);
+
+       if (is_console)
+               console_atomic_lock(flags);
+
+       ier = serial_in(up, UART_IER);
+       serial_out(up, UART_IER, ier & (~mask));
+
+       if (is_console)
+               console_atomic_unlock(flags);
  }
  
  static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
  {
-       serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
+       struct uart_port *port = &up->port;
+       unsigned long flags;
+       unsigned int ier;
+
+       if (uart_console(port))
+               console_atomic_lock(flags);
+
+       ier = serial_in(up, UART_IER);
+       serial_out(up, UART_IER, ier | mask);
+
+       if (uart_console(port))
+               console_atomic_unlock(flags);
  }
  
  static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c

index 691e7a0..1380655 100644 (file)
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -751,7 +751,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
                         serial_out(p, UART_EFR, UART_EFR_ECB);
                         serial_out(p, UART_LCR, 0);
                 }
-               serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
+               serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
                 if (p->capabilities & UART_CAP_EFR) {
                         serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
                         serial_out(p, UART_EFR, efr);
@@ -1426,7 +1426,7 @@ static void serial8250_stop_rx(struct uart_port *port)
  
         up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
         up->port.read_status_mask &= ~UART_LSR_DR;
-       serial_port_out(port, UART_IER, up->ier);
+       serial8250_set_IER(up, up->ier);
  
         serial8250_rpm_put(up);
  }
@@ -1456,7 +1456,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p)
                 serial8250_clear_and_reinit_fifos(p);
  
                 p->ier |= UART_IER_RLSI | UART_IER_RDI;
-               serial_port_out(&p->port, UART_IER, p->ier);
+               serial8250_set_IER(p, p->ier);
         }
  }
  EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);
@@ -1692,7 +1692,7 @@ static void serial8250_disable_ms(struct uart_port *port)
         mctrl_gpio_disable_ms(up->gpios);
  
         up->ier &= ~UART_IER_MSI;
-       serial_port_out(port, UART_IER, up->ier);
+       serial8250_set_IER(up, up->ier);
  }
  
  static void serial8250_enable_ms(struct uart_port *port)
@@ -1708,7 +1708,7 @@ static void serial8250_enable_ms(struct uart_port *port)
         up->ier |= UART_IER_MSI;
  
         serial8250_rpm_get(up);
-       serial_port_out(port, UART_IER, up->ier);
+       serial8250_set_IER(up, up->ier);
         serial8250_rpm_put(up);
  }
  
@@ -2133,14 +2133,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
         struct uart_8250_port *up = up_to_u8250p(port);
  
         serial8250_rpm_get(up);
-       /*
-        *      First save the IER then disable the interrupts
-        */
-       ier = serial_port_in(port, UART_IER);
-       if (up->capabilities & UART_CAP_UUE)
-               serial_port_out(port, UART_IER, UART_IER_UUE);
-       else
-               serial_port_out(port, UART_IER, 0);
+       ier = serial8250_clear_IER(up);
  
         wait_for_xmitr(up, BOTH_EMPTY);
         /*
@@ -2153,7 +2146,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
          *      and restore the IER
          */
         wait_for_xmitr(up, BOTH_EMPTY);
-       serial_port_out(port, UART_IER, ier);
+       serial8250_set_IER(up, ier);
         serial8250_rpm_put(up);
  }
  
@@ -2458,7 +2451,7 @@ void serial8250_do_shutdown(struct uart_port *port)
          */
         spin_lock_irqsave(&port->lock, flags);
         up->ier = 0;
-       serial_port_out(port, UART_IER, 0);
+       serial8250_set_IER(up, 0);
         spin_unlock_irqrestore(&port->lock, flags);
  
         synchronize_irq(port->irq);
@@ -2840,7 +2833,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
         if (up->capabilities & UART_CAP_RTOIE)
                 up->ier |= UART_IER_RTOIE;
  
-       serial_port_out(port, UART_IER, up->ier);
+       serial8250_set_IER(up, up->ier);
  
         if (up->capabilities & UART_CAP_EFR) {
                 unsigned char efr = 0;
@@ -3305,7 +3298,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults);
  
  #ifdef CONFIG_SERIAL_8250_CONSOLE
  
-static void serial8250_console_putchar(struct uart_port *port, int ch)
+static void serial8250_console_putchar_locked(struct uart_port *port, int ch)
  {
         struct uart_8250_port *up = up_to_u8250p(port);
  
@@ -3313,6 +3306,18 @@ static void serial8250_console_putchar(struct uart_port *port, int ch)
         serial_port_out(port, UART_TX, ch);
  }
  
+static void serial8250_console_putchar(struct uart_port *port, int ch)
+{
+       struct uart_8250_port *up = up_to_u8250p(port);
+       unsigned long flags;
+
+       wait_for_xmitr(up, UART_LSR_THRE);
+
+       console_atomic_lock(flags);
+       serial8250_console_putchar_locked(port, ch);
+       console_atomic_unlock(flags);
+}
+
  /*
   *     Restore serial console when h/w power-off detected
   */
@@ -3339,6 +3344,32 @@ static void serial8250_console_restore(struct uart_8250_port *up)
         serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
  }
  
+void serial8250_console_write_atomic(struct uart_8250_port *up,
+                                    const char *s, unsigned int count)
+{
+       struct uart_port *port = &up->port;
+       unsigned long flags;
+       unsigned int ier;
+
+       console_atomic_lock(flags);
+
+       touch_nmi_watchdog();
+
+       ier = serial8250_clear_IER(up);
+
+       if (atomic_fetch_inc(&up->console_printing)) {
+               uart_console_write(port, "\n", 1,
+                                  serial8250_console_putchar_locked);
+       }
+       uart_console_write(port, s, count, serial8250_console_putchar_locked);
+       atomic_dec(&up->console_printing);
+
+       wait_for_xmitr(up, BOTH_EMPTY);
+       serial8250_set_IER(up, ier);
+
+       console_atomic_unlock(flags);
+}
+
  /*
   *     Print a string to the serial port trying not to disturb
   *     any possible real use of the port...
@@ -3355,24 +3386,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
         struct uart_port *port = &up->port;
         unsigned long flags;
         unsigned int ier;
-       int locked = 1;
  
         touch_nmi_watchdog();
  
-       if (oops_in_progress)
-               locked = spin_trylock_irqsave(&port->lock, flags);
-       else
-               spin_lock_irqsave(&port->lock, flags);
-
-       /*
-        *      First save the IER then disable the interrupts
-        */
-       ier = serial_port_in(port, UART_IER);
+       spin_lock_irqsave(&port->lock, flags);
  
-       if (up->capabilities & UART_CAP_UUE)
-               serial_port_out(port, UART_IER, UART_IER_UUE);
-       else
-               serial_port_out(port, UART_IER, 0);
+       ier = serial8250_clear_IER(up);
  
         /* check scratch reg to see if port powered off during system sleep */
         if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
@@ -3386,7 +3405,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
                 mdelay(port->rs485.delay_rts_before_send);
         }
  
+       atomic_inc(&up->console_printing);
         uart_console_write(port, s, count, serial8250_console_putchar);
+       atomic_dec(&up->console_printing);
  
         /*
          *      Finally, wait for transmitter to become empty
@@ -3399,8 +3420,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
                 if (em485->tx_stopped)
                         up->rs485_stop_tx(up);
         }
-
-       serial_port_out(port, UART_IER, ier);
+       serial8250_set_IER(up, ier);
  
         /*
          *      The receive handling will happen properly because the
@@ -3412,8 +3432,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
         if (up->msr_saved_flags)
                 serial8250_modem_status(up);
  
-       if (locked)
-               spin_unlock_irqrestore(&port->lock, flags);
+       spin_unlock_irqrestore(&port->lock, flags);
  }
  
  static unsigned int probe_baud(struct uart_port *port)
@@ -3433,6 +3452,7 @@ static unsigned int probe_baud(struct uart_port *port)
  
  int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
  {
+       struct uart_8250_port *up = up_to_u8250p(port);
         int baud = 9600;
         int bits = 8;
         int parity = 'n';
@@ -3442,6 +3462,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
         if (!port->iobase && !port->membase)
                 return -ENODEV;
  
+       atomic_set(&up->console_printing, 0);
+
         if (options)
                 uart_parse_options(options, &baud, &parity, &bits, &flow);
         else if (probe)
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c

index 0473e78..ecdc140 100644 (file)
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2362,18 +2362,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
  {
         struct uart_amba_port *uap = amba_ports[co->index];
         unsigned int old_cr = 0, new_cr;
-       unsigned long flags;
+       unsigned long flags = 0;
         int locked = 1;
  
         clk_enable(uap->clk);
  
-       local_irq_save(flags);
+       /*
+        * local_irq_save(flags);
+        *
+        * This local_irq_save() is nonsense. If we come in via sysrq
+        * handling then interrupts are already disabled. Aside of
+        * that the port.sysrq check is racy on SMP regardless.
+       */
         if (uap->port.sysrq)
                 locked = 0;
         else if (oops_in_progress)
-               locked = spin_trylock(&uap->port.lock);
+               locked = spin_trylock_irqsave(&uap->port.lock, flags);
         else
-               spin_lock(&uap->port.lock);
+               spin_lock_irqsave(&uap->port.lock, flags);
  
         /*
          *      First save the CR then disable the interrupts
@@ -2399,8 +2405,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
                 pl011_write(old_cr, uap, REG_CR);
  
         if (locked)
-               spin_unlock(&uap->port.lock);
-       local_irq_restore(flags);
+               spin_unlock_irqrestore(&uap->port.lock, flags);
  
         clk_disable(uap->clk);
  }
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c

index 0862941..1097063 100644 (file)
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1255,13 +1255,10 @@ serial_omap_console_write(struct console *co, const char *s,
         unsigned int ier;
         int locked = 1;
  
-       local_irq_save(flags);
-       if (up->port.sysrq)
-               locked = 0;
-       else if (oops_in_progress)
-               locked = spin_trylock(&up->port.lock);
+       if (up->port.sysrq || oops_in_progress)
+               locked = spin_trylock_irqsave(&up->port.lock, flags);
         else
-               spin_lock(&up->port.lock);
+               spin_lock_irqsave(&up->port.lock, flags);
  
         /*
          * First save the IER then disable the interrupts
@@ -1288,8 +1285,7 @@ serial_omap_console_write(struct console *co, const char *s,
                 check_modem_status(up);
  
         if (locked)
-               spin_unlock(&up->port.lock);
-       local_irq_restore(flags);
+               spin_unlock_irqrestore(&up->port.lock, flags);
  }
  
  static int __init
diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c

index df51849..d4ad211 100644 (file)
--- a/drivers/virt/acrn/irqfd.c
+++ b/drivers/virt/acrn/irqfd.c
@@ -17,7 +17,6 @@
  #include "acrn_drv.h"
  
  static LIST_HEAD(acrn_irqfd_clients);
-static DEFINE_MUTEX(acrn_irqfds_mutex);
  
  /**
   * struct hsm_irqfd - Properties of HSM irqfd
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c

index 45cfd50..502b565 100644 (file)
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -239,7 +239,7 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode)
         struct dentry *alias;
         int ret;
  
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+       DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  
         _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
  
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c

index 1929e80..48eb8c3 100644 (file)
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -69,7 +69,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
         struct inode *inode;
         struct super_block *sb = parent->d_sb;
         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+       DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  
         cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
  
diff --git a/fs/dcache.c b/fs/dcache.c

index cf871a8..02db80f 100644 (file)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2537,7 +2537,13 @@ EXPORT_SYMBOL(d_rehash);
  
  static inline unsigned start_dir_add(struct inode *dir)
  {
-
+       /*
+        * The caller has a spinlock_t (dentry::d_lock) acquired which disables
+        * preemption on !PREEMPT_RT. On PREEMPT_RT the lock does not disable
+        * preemption and it has be done explicitly.
+        */
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_disable();
         for (;;) {
                 unsigned n = dir->i_dir_seq;
                 if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
@@ -2549,25 +2555,30 @@ static inline unsigned start_dir_add(struct inode *dir)
  static inline void end_dir_add(struct inode *dir, unsigned n)
  {
         smp_store_release(&dir->i_dir_seq, n + 2);
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_enable();
  }
  
  static void d_wait_lookup(struct dentry *dentry)
  {
-       if (d_in_lookup(dentry)) {
-               DECLARE_WAITQUEUE(wait, current);
-               add_wait_queue(dentry->d_wait, &wait);
-               do {
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       spin_unlock(&dentry->d_lock);
-                       schedule();
-                       spin_lock(&dentry->d_lock);
-               } while (d_in_lookup(dentry));
-       }
+       struct swait_queue __wait;
+
+       if (!d_in_lookup(dentry))
+               return;
+
+       INIT_LIST_HEAD(&__wait.task_list);
+       do {
+               prepare_to_swait_exclusive(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE);
+               spin_unlock(&dentry->d_lock);
+               schedule();
+               spin_lock(&dentry->d_lock);
+       } while (d_in_lookup(dentry));
+       finish_swait(dentry->d_wait, &__wait);
  }
  
  struct dentry *d_alloc_parallel(struct dentry *parent,
                                 const struct qstr *name,
-                               wait_queue_head_t *wq)
+                               struct swait_queue_head *wq)
  {
         unsigned int hash = name->hash;
         struct hlist_bl_head *b = in_lookup_hash(parent, hash);
@@ -2682,7 +2693,7 @@ void __d_lookup_done(struct dentry *dentry)
         hlist_bl_lock(b);
         dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
         __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
-       wake_up_all(dentry->d_wait);
+       swake_up_all(dentry->d_wait);
         dentry->d_wait = NULL;
         hlist_bl_unlock(b);
         INIT_HLIST_NODE(&dentry->d_u.d_alias);
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h

index c3e4804..9edb87e 100644 (file)
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -81,7 +81,6 @@ extern unsigned fscache_debug;
  extern struct kobject *fscache_root;
  extern struct workqueue_struct *fscache_object_wq;
  extern struct workqueue_struct *fscache_op_wq;
-DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
  
  extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);
  
diff --git a/fs/fscache/main.c b/fs/fscache/main.c

index 4207f98..85f8cf3 100644 (file)
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -41,8 +41,6 @@ struct kobject *fscache_root;
  struct workqueue_struct *fscache_object_wq;
  struct workqueue_struct *fscache_op_wq;
  
-DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
-
  /* these values serve as lower bounds, will be adjusted in fscache_init() */
  static unsigned fscache_object_max_active = 4;
  static unsigned fscache_op_max_active = 2;
@@ -138,7 +136,6 @@ unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)
  static int __init fscache_init(void)
  {
         unsigned int nr_cpus = num_possible_cpus();
-       unsigned int cpu;
         int ret;
  
         fscache_object_max_active =
@@ -161,9 +158,6 @@ static int __init fscache_init(void)
         if (!fscache_op_wq)
                 goto error_op_wq;
  
-       for_each_possible_cpu(cpu)
-               init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
-
         ret = fscache_proc_init();
         if (ret < 0)
                 goto error_proc;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c

index 6a67565..7a972d1 100644 (file)
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -798,6 +798,8 @@ void fscache_object_destroy(struct fscache_object *object)
  }
  EXPORT_SYMBOL(fscache_object_destroy);
  
+static DECLARE_WAIT_QUEUE_HEAD(fscache_object_cong_wait);
+
  /*
   * enqueue an object for metadata-type processing
   */
@@ -806,16 +808,12 @@ void fscache_enqueue_object(struct fscache_object *object)
         _enter("{OBJ%x}", object->debug_id);
  
         if (fscache_get_object(object, fscache_obj_get_queue) >= 0) {
-               wait_queue_head_t *cong_wq =
-                       &get_cpu_var(fscache_object_cong_wait);
  
                 if (queue_work(fscache_object_wq, &object->work)) {
                         if (fscache_object_congested())
-                               wake_up(cong_wq);
+                               wake_up(&fscache_object_cong_wait);
                 } else
                         fscache_put_object(object, fscache_obj_put_queue);
-
-               put_cpu_var(fscache_object_cong_wait);
         }
  }
  
@@ -833,16 +831,15 @@ void fscache_enqueue_object(struct fscache_object *object)
   */
  bool fscache_object_sleep_till_congested(signed long *timeoutp)
  {
-       wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait);
         DEFINE_WAIT(wait);
  
         if (fscache_object_congested())
                 return true;
  
-       add_wait_queue_exclusive(cong_wq, &wait);
+       add_wait_queue_exclusive(&fscache_object_cong_wait, &wait);
         if (!fscache_object_congested())
                 *timeoutp = schedule_timeout(*timeoutp);
-       finish_wait(cong_wq, &wait);
+       finish_wait(&fscache_object_cong_wait, &wait);
  
         return fscache_object_congested();
  }
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c

index d5294e6..ee88468 100644 (file)
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -160,7 +160,7 @@ static int fuse_direntplus_link(struct file *file,
         struct inode *dir = d_inode(parent);
         struct fuse_conn *fc;
         struct inode *inode;
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+       DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  
         if (!o->nodeid) {
                 /*
diff --git a/fs/namei.c b/fs/namei.c

index 81b31d9..de83b67 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1635,7 +1635,7 @@ static struct dentry *__lookup_slow(const struct qstr *name,
  {
         struct dentry *dentry, *old;
         struct inode *inode = dir->d_inode;
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+       DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  
         /* Don't go there if it's already dead */
         if (unlikely(IS_DEADDIR(inode)))
@@ -3246,7 +3246,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
         struct dentry *dentry;
         int error, create_error = 0;
         umode_t mode = op->mode;
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+       DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  
         if (unlikely(IS_DEADDIR(dir_inode)))
                 return ERR_PTR(-ENOENT);
diff --git a/fs/namespace.c b/fs/namespace.c

index 9f80d88..5b42136 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -344,8 +344,24 @@ int __mnt_want_write(struct vfsmount *m)
          * incremented count after it has set MNT_WRITE_HOLD.
          */
         smp_mb();
-       while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
-               cpu_relax();
+       might_lock(&mount_lock.lock);
+       while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+               if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+                       cpu_relax();
+               } else {
+                       /*
+                        * This prevents priority inversion, if the task
+                        * setting MNT_WRITE_HOLD got preempted on a remote
+                        * CPU, and it prevents life lock if the task setting
+                        * MNT_WRITE_HOLD has a lower priority and is bound to
+                        * the same CPU as the task that is spinning here.
+                        */
+                       preempt_enable();
+                       lock_mount_hash();
+                       unlock_mount_hash();
+                       preempt_disable();
+               }
+       }
         /*
          * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
          * be set to match its requirements. So we must not load that until
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index 32c3d0c..b8ff452 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -637,7 +637,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
                 unsigned long dir_verifier)
  {
         struct qstr filename = QSTR_INIT(entry->name, entry->len);
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+       DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
         struct dentry *dentry;
         struct dentry *alias;
         struct inode *inode;
@@ -1873,7 +1873,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
                     struct file *file, unsigned open_flags,
                     umode_t mode)
  {
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+       DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
         struct nfs_open_context *ctx;
         struct dentry *res;
         struct iattr attr = { .ia_valid = ATTR_OPEN };
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c

index d5ccf09..0944c06 100644 (file)
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -13,7 +13,7 @@
  #include <linux/sunrpc/clnt.h>
  #include <linux/nfs_fs.h>
  #include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/swait.h>
  #include <linux/namei.h>
  #include <linux/fsnotify.h>
  
@@ -184,7 +184,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
  
         data->cred = get_current_cred();
         data->res.dir_attr = &data->dir_attr;
-       init_waitqueue_head(&data->wq);
+       init_swait_queue_head(&data->wq);
  
         status = -EBUSY;
         spin_lock(&dentry->d_lock);
diff --git a/fs/proc/base.c b/fs/proc/base.c

index 300d53e..6ab25d4 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -96,6 +96,7 @@
  #include <linux/posix-timers.h>
  #include <linux/time_namespace.h>
  #include <linux/resctrl.h>
+#include <linux/swait.h>
  #include <linux/cn_proc.h>
  #include <trace/events/oom.h>
  #include "internal.h"
@@ -2071,7 +2072,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
  
         child = d_hash_and_lookup(dir, &qname);
         if (!child) {
-               DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+               DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
                 child = d_alloc_parallel(dir, &qname, &wq);
                 if (IS_ERR(child))
                         goto end_instantiate;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c

index 0b7a00e..a7828fc 100644 (file)
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -679,7 +679,7 @@ static bool proc_sys_fill_cache(struct file *file,
  
         child = d_lookup(dir, &qname);
         if (!child) {
-               DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+               DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
                 child = d_alloc_parallel(dir, &qname, &wq);
                 if (IS_ERR(child))
                         return false;
diff --git a/include/asm-generic/softirq_stack.h b/include/asm-generic/softirq_stack.h

index eceeecf..d3e2d81 100644 (file)
--- a/include/asm-generic/softirq_stack.h
+++ b/include/asm-generic/softirq_stack.h
@@ -2,7 +2,7 @@
  #ifndef __ASM_GENERIC_SOFTIRQ_STACK_H
  #define __ASM_GENERIC_SOFTIRQ_STACK_H
  
-#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
+#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT)
  void do_softirq_own_stack(void);
  #else
  static inline void do_softirq_own_stack(void)
diff --git a/include/linux/console.h b/include/linux/console.h

index a97f277..487a426 100644 (file)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -16,6 +16,13 @@
  
  #include <linux/atomic.h>
  #include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/seqlock.h>
+
+struct latched_seq {
+       seqcount_latch_t        latch;
+       u64                     val[2];
+};
  
  struct vc_data;
  struct console_font_op;
@@ -136,10 +143,12 @@ static inline int con_debug_leave(void)
  #define CON_ANYTIME    (16) /* Safe to call when cpu is offline */
  #define CON_BRL                (32) /* Used for a braille device */
  #define CON_EXTENDED   (64) /* Use the extended output format a la /dev/kmsg */
+#define CON_HANDOVER   (128) /* Device was previously a boot console. */
  
  struct console {
         char    name[16];
         void    (*write)(struct console *, const char *, unsigned);
+       void    (*write_atomic)(struct console *co, const char *s, unsigned int count);
         int     (*read)(struct console *, char *, unsigned);
         struct tty_driver *(*device)(struct console *, int *);
         void    (*unblank)(void);
@@ -149,6 +158,16 @@ struct console {
         short   flags;
         short   index;
         int     cflag;
+#ifdef CONFIG_PRINTK
+       char    sync_buf[CONSOLE_LOG_MAX];
+       struct latched_seq printk_seq;
+       struct latched_seq printk_sync_seq;
+#ifdef CONFIG_HAVE_NMI
+       struct latched_seq printk_sync_nmi_seq;
+#endif
+#endif /* CONFIG_PRINTK */
+
+       struct task_struct *thread;
         uint    ispeed;
         uint    ospeed;
         void    *data;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h

index 9e23d33..9f89d48 100644 (file)
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -108,7 +108,7 @@ struct dentry {
  
         union {
                 struct list_head d_lru;         /* LRU list */
-               wait_queue_head_t *d_wait;      /* in-lookup ones only */
+               struct swait_queue_head *d_wait;        /* in-lookup ones only */
         };
         struct list_head d_child;       /* child of parent list */
         struct list_head d_subdirs;     /* our children */
@@ -240,7 +240,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
  extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
  extern struct dentry * d_alloc_anon(struct super_block *);
  extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
-                                       wait_queue_head_t *);
+                                       struct swait_queue_head *);
  extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
  extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
  extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h

index 2e2b8d6..71064a2 100644 (file)
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -57,9 +57,15 @@
  # define ARCH_EXIT_TO_USER_MODE_WORK           (0)
  #endif
  
+#ifdef CONFIG_PREEMPT_LAZY
+# define _TIF_NEED_RESCHED_MASK        (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+#else
+# define _TIF_NEED_RESCHED_MASK        (_TIF_NEED_RESCHED)
+#endif
+
  #define EXIT_TO_USER_MODE_WORK                                         \
         (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |           \
-        _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |  \
+        _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |     \
          ARCH_EXIT_TO_USER_MODE_WORK)
  
  /**
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h

index ec2a47a..8cd11a2 100644 (file)
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,7 @@
  #define _LINUX_IRQ_WORK_H
  
  #include <linux/smp_types.h>
+#include <linux/rcuwait.h>
  
  /*
   * An entry can be in one of four states:
@@ -16,11 +17,13 @@
  struct irq_work {
         struct __call_single_node node;
         void (*func)(struct irq_work *);
+       struct rcuwait irqwait;
  };
  
  #define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){      \
         .node = { .u_flags = (_flags), },                       \
         .func = (_func),                                        \
+       .irqwait = __RCUWAIT_INITIALIZER(irqwait),              \
  }
  
  #define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
@@ -46,6 +49,11 @@ static inline bool irq_work_is_busy(struct irq_work *work)
         return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
  }
  
+static inline bool irq_work_is_hard(struct irq_work *work)
+{
+       return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ;
+}
+
  bool irq_work_queue(struct irq_work *work);
  bool irq_work_queue_on(struct irq_work *work, int cpu);
  
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h

index 59aea39..d69b819 100644 (file)
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
  
  int handle_irq_desc(struct irq_desc *desc);
  int generic_handle_irq(unsigned int irq);
+int generic_handle_irq_safe(unsigned int irq);
  
  #ifdef CONFIG_IRQ_DOMAIN
  /*
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h

index 747f40e..5ec0fa7 100644 (file)
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -71,14 +71,6 @@ do {                                         \
  do {                                           \
         __this_cpu_dec(hardirq_context);        \
  } while (0)
-# define lockdep_softirq_enter()               \
-do {                                           \
-       current->softirq_context++;             \
-} while (0)
-# define lockdep_softirq_exit()                        \
-do {                                           \
-       current->softirq_context--;             \
-} while (0)
  
  # define lockdep_hrtimer_enter(__hrtimer)              \
  ({                                                     \
@@ -140,6 +132,21 @@ do {                                               \
  # define lockdep_irq_work_exit(__work)         do { } while (0)
  #endif
  
+#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT)
+# define lockdep_softirq_enter()               \
+do {                                           \
+       current->softirq_context++;             \
+} while (0)
+# define lockdep_softirq_exit()                        \
+do {                                           \
+       current->softirq_context--;             \
+} while (0)
+
+#else
+# define lockdep_softirq_enter()               do { } while (0)
+# define lockdep_softirq_exit()                        do { } while (0)
+#endif
+
  #if defined(CONFIG_IRQSOFF_TRACER) || \
         defined(CONFIG_PREEMPT_TRACER)
   extern void stop_critical_timings(void);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h

index f56cd88..49f1e92 100644 (file)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -111,8 +111,8 @@ static __always_inline void might_resched(void)
  #endif /* CONFIG_PREEMPT_* */
  
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-extern void ___might_sleep(const char *file, int line, int preempt_offset);
-extern void __might_sleep(const char *file, int line, int preempt_offset);
+extern void __might_resched(const char *file, int line, unsigned int offsets);
+extern void __might_sleep(const char *file, int line);
  extern void __cant_sleep(const char *file, int line, int preempt_offset);
  extern void __cant_migrate(const char *file, int line);
  
@@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line);
   * supposed to.
   */
  # define might_sleep() \
-       do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+       do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
  /**
   * cant_sleep - annotation for functions that cannot sleep
   *
@@ -168,10 +168,9 @@ extern void __cant_migrate(const char *file, int line);
   */
  # define non_block_end() WARN_ON(current->non_block_count-- == 0)
  #else
-  static inline void ___might_sleep(const char *file, int line,
-                                  int preempt_offset) { }
-  static inline void __might_sleep(const char *file, int line,
-                                  int preempt_offset) { }
+  static inline void __might_resched(const char *file, int line,
+                                    unsigned int offsets) { }
+static inline void __might_sleep(const char *file, int line) { }
  # define might_sleep() do { might_resched(); } while (0)
  # define cant_sleep() do { } while (0)
  # define cant_migrate()                do { } while (0)
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h

index 258cdde..9bca0d9 100644 (file)
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -212,6 +212,8 @@ extern void kgdb_call_nmi_hook(void *ignored);
   */
  extern void kgdb_roundup_cpus(void);
  
+extern void kgdb_roundup_cpu(unsigned int cpu);
+
  /**
   *     kgdb_arch_set_pc - Generic call back to the program counter
   *     @regs: Current &struct pt_regs.
@@ -365,5 +367,6 @@ extern void kgdb_free_init_mem(void);
  #define dbg_late_init()
  static inline void kgdb_panic(const char *msg) {}
  static inline void kgdb_free_init_mem(void) { }
+static inline void kgdb_roundup_cpu(unsigned int cpu) {}
  #endif /* ! CONFIG_KGDB */
  #endif /* _KGDB_H_ */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 7f8ee09..e9672de 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -12,6 +12,7 @@
  #include <linux/completion.h>
  #include <linux/cpumask.h>
  #include <linux/uprobes.h>
+#include <linux/rcupdate.h>
  #include <linux/page-flags-layout.h>
  #include <linux/workqueue.h>
  #include <linux/seqlock.h>
@@ -572,6 +573,9 @@ struct mm_struct {
                 bool tlb_flush_batched;
  #endif
                 struct uprobes_state uprobes_state;
+#ifdef CONFIG_PREEMPT_RT
+               struct rcu_head delayed_drop;
+#endif
  #ifdef CONFIG_HUGETLB_PAGE
                 atomic_long_t hugetlb_usage;
  #endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index 3a75d64..6bfa20b 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1937,7 +1937,6 @@ enum netdev_ml_priv_type {
   *     @sfp_bus:       attached &struct sfp_bus structure.
   *
   *     @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
- *     @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
   *
   *     @proto_down:    protocol port state information can be sent to the
   *                     switch driver and used to set the phys state of the
@@ -2271,7 +2270,6 @@ struct net_device {
         struct phy_device       *phydev;
         struct sfp_bus          *sfp_bus;
         struct lock_class_key   *qdisc_tx_busylock;
-       struct lock_class_key   *qdisc_running_key;
         bool                    proto_down;
         unsigned                wol_enabled:1;
         unsigned                threaded:1;
@@ -2381,13 +2379,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
  #define netdev_lockdep_set_classes(dev)                                \
  {                                                              \
         static struct lock_class_key qdisc_tx_busylock_key;     \
-       static struct lock_class_key qdisc_running_key;         \
         static struct lock_class_key qdisc_xmit_lock_key;       \
         static struct lock_class_key dev_addr_list_lock_key;    \
         unsigned int i;                                         \
                                                                 \
         (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;      \
-       (dev)->qdisc_running_key = &qdisc_running_key;          \
         lockdep_set_class(&(dev)->addr_list_lock,               \
                           &dev_addr_list_lock_key);             \
         for (i = 0; i < (dev)->num_tx_queues; i++)              \
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h

index 783f871..4e4d2b9 100644 (file)
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1692,7 +1692,7 @@ struct nfs_unlinkdata {
         struct nfs_removeargs args;
         struct nfs_removeres res;
         struct dentry *dentry;
-       wait_queue_head_t wq;
+       struct swait_queue_head wq;
         const struct cred *cred;
         struct nfs_fattr dir_attr;
         long timeout;
diff --git a/include/linux/preempt.h b/include/linux/preempt.h

index 4d244e2..3da73c9 100644 (file)
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -122,9 +122,10 @@
   * The preempt_count offset after spin_lock()
   */
  #if !defined(CONFIG_PREEMPT_RT)
-#define PREEMPT_LOCK_OFFSET    PREEMPT_DISABLE_OFFSET
+#define PREEMPT_LOCK_OFFSET            PREEMPT_DISABLE_OFFSET
  #else
-#define PREEMPT_LOCK_OFFSET    0
+/* Locks on RT do not disable preemption */
+#define PREEMPT_LOCK_OFFSET            0
  #endif
  
  /*
@@ -174,6 +175,20 @@ extern void preempt_count_sub(int val);
  #define preempt_count_inc() preempt_count_add(1)
  #define preempt_count_dec() preempt_count_sub(1)
  
+#ifdef CONFIG_PREEMPT_LAZY
+#define add_preempt_lazy_count(val)    do { preempt_lazy_count() += (val); } while (0)
+#define sub_preempt_lazy_count(val)    do { preempt_lazy_count() -= (val); } while (0)
+#define inc_preempt_lazy_count()       add_preempt_lazy_count(1)
+#define dec_preempt_lazy_count()       sub_preempt_lazy_count(1)
+#define preempt_lazy_count()           (current_thread_info()->preempt_lazy_count)
+#else
+#define add_preempt_lazy_count(val)    do { } while (0)
+#define sub_preempt_lazy_count(val)    do { } while (0)
+#define inc_preempt_lazy_count()       do { } while (0)
+#define dec_preempt_lazy_count()       do { } while (0)
+#define preempt_lazy_count()           (0)
+#endif
+
  #ifdef CONFIG_PREEMPT_COUNT
  
  #define preempt_disable() \
@@ -182,13 +197,25 @@ do { \
         barrier(); \
  } while (0)
  
+#define preempt_lazy_disable() \
+do { \
+       inc_preempt_lazy_count(); \
+       barrier(); \
+} while (0)
+
  #define sched_preempt_enable_no_resched() \
  do { \
         barrier(); \
         preempt_count_dec(); \
  } while (0)
  
-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+#ifndef CONFIG_PREEMPT_RT
+# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+# define preempt_check_resched_rt() barrier();
+#else
+# define preempt_enable_no_resched() preempt_enable()
+# define preempt_check_resched_rt() preempt_check_resched()
+#endif
  
  #define preemptible()  (preempt_count() == 0 && !irqs_disabled())
  
@@ -213,6 +240,18 @@ do { \
                 __preempt_schedule(); \
  } while (0)
  
+/*
+ * open code preempt_check_resched() because it is not exported to modules and
+ * used by local_unlock() or bpf_enable_instrumentation().
+ */
+#define preempt_lazy_enable() \
+do { \
+       dec_preempt_lazy_count(); \
+       barrier(); \
+       if (should_resched(0)) \
+               __preempt_schedule(); \
+} while (0)
+
  #else /* !CONFIG_PREEMPTION */
  #define preempt_enable() \
  do { \
@@ -220,6 +259,12 @@ do { \
         preempt_count_dec(); \
  } while (0)
  
+#define preempt_lazy_enable() \
+do { \
+       dec_preempt_lazy_count(); \
+       barrier(); \
+} while (0)
+
  #define preempt_enable_notrace() \
  do { \
         barrier(); \
@@ -258,8 +303,12 @@ do { \
  #define preempt_disable_notrace()              barrier()
  #define preempt_enable_no_resched_notrace()    barrier()
  #define preempt_enable_notrace()               barrier()
+#define preempt_check_resched_rt()             barrier()
  #define preemptible()                          0
  
+#define preempt_lazy_disable()                 barrier()
+#define preempt_lazy_enable()                  barrier()
+
  #endif /* CONFIG_PREEMPT_COUNT */
  
  #ifdef MODULE
@@ -278,7 +327,7 @@ do { \
  } while (0)
  #define preempt_fold_need_resched() \
  do { \
-       if (tif_need_resched()) \
+       if (tif_need_resched_now()) \
                 set_preempt_need_resched(); \
  } while (0)
  
@@ -394,8 +443,15 @@ extern void migrate_enable(void);
  
  #else
  
-static inline void migrate_disable(void) { }
-static inline void migrate_enable(void) { }
+static inline void migrate_disable(void)
+{
+       preempt_lazy_disable();
+}
+
+static inline void migrate_enable(void)
+{
+       preempt_lazy_enable();
+}
  
  #endif /* CONFIG_SMP */
  
diff --git a/include/linux/printk.h b/include/linux/printk.h

index 9497f6b..eddfc5d 100644 (file)
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -47,6 +47,12 @@ static inline const char *printk_skip_headers(const char *buffer)
  
  #define CONSOLE_EXT_LOG_MAX    8192
  
+/*
+ * The maximum size of a record formatted for console printing
+ * (i.e. with the prefix prepended to every line).
+ */
+#define CONSOLE_LOG_MAX                1024
+
  /* printk's without a loglevel use this.. */
  #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
  
@@ -155,6 +161,8 @@ int vprintk(const char *fmt, va_list args);
  asmlinkage __printf(1, 2) __cold
  int _printk(const char *fmt, ...);
  
+bool pr_flush(int timeout_ms, bool reset_on_progress);
+
  /*
   * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
   */
@@ -224,6 +232,11 @@ static inline void printk_deferred_exit(void)
  {
  }
  
+static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+       return true;
+}
+
  static inline int printk_ratelimit(void)
  {
         return 0;
@@ -284,17 +297,30 @@ static inline void printk_trigger_flush(void)
  extern int __printk_cpu_trylock(void);
  extern void __printk_wait_on_cpu_lock(void);
  extern void __printk_cpu_unlock(void);
+extern bool kgdb_roundup_delay(unsigned int cpu);
+
+#else
+
+#define __printk_cpu_trylock()         1
+#define __printk_wait_on_cpu_lock()
+#define __printk_cpu_unlock()
+
+static inline bool kgdb_roundup_delay(unsigned int cpu)
+{
+       return false;
+}
+#endif /* CONFIG_SMP */
  
  /**
- * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
- *                             lock and disable interrupts.
+ * raw_printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
+ *                                 lock and disable interrupts.
   * @flags: Stack-allocated storage for saving local interrupt state,
- *         to be passed to printk_cpu_unlock_irqrestore().
+ *         to be passed to raw_printk_cpu_unlock_irqrestore().
   *
   * If the lock is owned by another CPU, spin until it becomes available.
   * Interrupts are restored while spinning.
   */
-#define printk_cpu_lock_irqsave(flags)         \
+#define raw_printk_cpu_lock_irqsave(flags)     \
         for (;;) {                              \
                 local_irq_save(flags);          \
                 if (__printk_cpu_trylock())     \
@@ -304,22 +330,30 @@ extern void __printk_cpu_unlock(void);
         }
  
  /**
- * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
- *                                  lock and restore interrupts.
- * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
+ * raw_printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant
+ *                                      spinning lock and restore interrupts.
+ * @flags: Caller's saved interrupt state from raw_printk_cpu_lock_irqsave().
   */
-#define printk_cpu_unlock_irqrestore(flags)    \
+#define raw_printk_cpu_unlock_irqrestore(flags)        \
         do {                                    \
                 __printk_cpu_unlock();          \
                 local_irq_restore(flags);       \
-       } while (0)                             \
-
-#else
+       } while (0)
  
-#define printk_cpu_lock_irqsave(flags) ((void)flags)
-#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
+/*
+ * Used to synchronize atomic consoles.
+ *
+ * The same as raw_printk_cpu_lock_irqsave() except that hardware interrupts
+ * are _not_ restored while spinning.
+ */
+#define console_atomic_lock(flags)             \
+       do {                                    \
+               local_irq_save(flags);          \
+               while (!__printk_cpu_trylock()) \
+                       cpu_relax();            \
+       } while (0)
  
-#endif /* CONFIG_SMP */
+#define console_atomic_unlock raw_printk_cpu_unlock_irqrestore
  
  extern int kptr_restrict;
  
diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h

index f0e535f..0022666 100644 (file)
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -4,7 +4,7 @@
  
  #include <linux/bits.h>
  #include <linux/param.h>
-#include <linux/spinlock_types.h>
+#include <linux/spinlock_types_raw.h>
  
  #define DEFAULT_RATELIMIT_INTERVAL     (5 * HZ)
  #define DEFAULT_RATELIMIT_BURST                10
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h

index 434d12f..de6d1a2 100644 (file)
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -94,6 +94,13 @@ void rcu_init_tasks_generic(void);
  static inline void rcu_init_tasks_generic(void) { }
  #endif
  
+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TASKS_RCU_GENERIC)
+void rcu_tasks_initiate_self_tests(void);
+#else
+static inline void rcu_tasks_initiate_self_tests(void) {}
+#endif
+
+
  #ifdef CONFIG_RCU_STALL_COMMON
  void rcu_sysrq_start(void);
  void rcu_sysrq_end(void);
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h

index 9deedfe..7d04988 100644 (file)
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -99,13 +99,22 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock
  
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass);
+extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock);
  #define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0)
+#define rt_mutex_lock_nest_lock(lock, nest_lock)                       \
+       do {                                                            \
+               typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
+               _rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);  \
+       } while (0)
+
  #else
  extern void rt_mutex_lock(struct rt_mutex *lock);
  #define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock)
+#define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock)
  #endif
  
  extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
+extern int rt_mutex_lock_killable(struct rt_mutex *lock);
  extern int rt_mutex_trylock(struct rt_mutex *lock);
  
  extern void rt_mutex_unlock(struct rt_mutex *lock);
diff --git a/include/linux/sched.h b/include/linux/sched.h

index e418935..7790659 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,12 +118,8 @@ struct task_group;
  
  #define task_is_running(task)          (READ_ONCE((task)->__state) == TASK_RUNNING)
  
-#define task_is_traced(task)           ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
-
  #define task_is_stopped(task)          ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
  
-#define task_is_stopped_or_traced(task)        ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-
  /*
   * Special states are those that do not use the normal wait-loop pattern. See
   * the comment with set_special_state().
@@ -1084,6 +1080,10 @@ struct task_struct {
         /* Restored if set_restore_sigmask() was used: */
         sigset_t                        saved_sigmask;
         struct sigpending               pending;
+#ifdef CONFIG_PREEMPT_RT
+       /* TODO: move me into ->restart_block ? */
+       struct                          kernel_siginfo forced_info;
+#endif
         unsigned long                   sas_ss_sp;
         size_t                          sas_ss_size;
         unsigned int                    sas_ss_flags;
@@ -1738,6 +1738,16 @@ static __always_inline bool is_percpu_thread(void)
  #endif
  }
  
+/* Is the current task guaranteed to stay on its current CPU? */
+static inline bool is_migratable(void)
+{
+#ifdef CONFIG_SMP
+       return preemptible() && !current->migration_disabled;
+#else
+       return false;
+#endif
+}
+
  /* Per-process atomic flags. */
  #define PFA_NO_NEW_PRIVS               0       /* May not gain new privileges. */
  #define PFA_SPREAD_PAGE                        1       /* Spread page cache over cpuset */
@@ -2013,6 +2023,118 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
         return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  }
  
+#ifdef CONFIG_PREEMPT_LAZY
+static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+       set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+       clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+       return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
+}
+
+static inline int need_resched_lazy(void)
+{
+       return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int need_resched_now(void)
+{
+       return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#else
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
+static inline int need_resched_lazy(void) { return 0; }
+
+static inline int need_resched_now(void)
+{
+       return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#endif
+
+#ifdef CONFIG_PREEMPT_RT
+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
+{
+       return p->saved_state == match_state;
+}
+
+static inline bool task_is_traced(struct task_struct *task)
+{
+       bool traced = false;
+
+       /* in case the task is sleeping on tasklist_lock */
+       raw_spin_lock_irq(&task->pi_lock);
+       if (READ_ONCE(task->__state) & __TASK_TRACED)
+               traced = true;
+       else if (task->saved_state & __TASK_TRACED)
+               traced = true;
+       raw_spin_unlock_irq(&task->pi_lock);
+       return traced;
+}
+
+static inline bool task_is_stopped_or_traced(struct task_struct *task)
+{
+       bool traced_stopped = false;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+       if (READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED))
+               traced_stopped = true;
+       else if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
+               traced_stopped = true;
+
+       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       return traced_stopped;
+}
+
+#else
+
+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
+{
+       return false;
+}
+
+static inline bool task_is_traced(struct task_struct *task)
+{
+       return READ_ONCE(task->__state) & __TASK_TRACED;
+}
+
+static inline bool task_is_stopped_or_traced(struct task_struct *task)
+{
+       return READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED);
+}
+#endif
+
+static inline bool task_match_state_or_saved(struct task_struct *p,
+                                            long match_state)
+{
+       if (READ_ONCE(p->__state) == match_state)
+               return true;
+
+       return task_match_saved_state(p, match_state);
+}
+
+static inline bool task_match_state_lock(struct task_struct *p,
+                                        long match_state)
+{
+       bool match;
+
+       raw_spin_lock_irq(&p->pi_lock);
+       match = task_match_state_or_saved(p, match_state);
+       raw_spin_unlock_irq(&p->pi_lock);
+
+       return match;
+}
+
  /*
   * cond_resched() and cond_resched_lock(): latency reduction via
   * explicit rescheduling in places that are safe. The return
@@ -2047,7 +2169,7 @@ static inline int _cond_resched(void) { return 0; }
  #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
  
  #define cond_resched() ({                      \
-       ___might_sleep(__FILE__, __LINE__, 0);  \
+       __might_resched(__FILE__, __LINE__, 0); \
         _cond_resched();                        \
  })
  
@@ -2055,19 +2177,38 @@ extern int __cond_resched_lock(spinlock_t *lock);
  extern int __cond_resched_rwlock_read(rwlock_t *lock);
  extern int __cond_resched_rwlock_write(rwlock_t *lock);
  
-#define cond_resched_lock(lock) ({                             \
-       ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
-       __cond_resched_lock(lock);                              \
+#define MIGHT_RESCHED_RCU_SHIFT                8
+#define MIGHT_RESCHED_PREEMPT_MASK     ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
+
+#ifndef CONFIG_PREEMPT_RT
+/*
+ * Non RT kernels have an elevated preempt count due to the held lock,
+ * but are not allowed to be inside a RCU read side critical section
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS  PREEMPT_LOCK_OFFSET
+#else
+/*
+ * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
+ * cond_resched*lock() has to take that into account because it checks for
+ * preempt_count() and rcu_preempt_depth().
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS  \
+       (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
+#endif
+
+#define cond_resched_lock(lock) ({                                             \
+       __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);      \
+       __cond_resched_lock(lock);                                              \
  })
  
-#define cond_resched_rwlock_read(lock) ({                      \
-       __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
-       __cond_resched_rwlock_read(lock);                       \
+#define cond_resched_rwlock_read(lock) ({                                      \
+       __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);      \
+       __cond_resched_rwlock_read(lock);                                       \
  })
  
-#define cond_resched_rwlock_write(lock) ({                     \
-       __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
-       __cond_resched_rwlock_write(lock);                      \
+#define cond_resched_rwlock_write(lock) ({                                     \
+       __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);      \
+       __cond_resched_rwlock_write(lock);                                      \
  })
  
  static inline void cond_resched_rcu(void)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h

index 95fb7aa..28e9cc6 100644 (file)
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,26 @@ static inline void mmdrop(struct mm_struct *mm)
                 __mmdrop(mm);
  }
  
+#ifdef CONFIG_PREEMPT_RT
+extern void __mmdrop_delayed(struct rcu_head *rhp);
+
+/*
+ * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
+ * kernels via RCU.
+ */
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+       /* Provides a full memory barrier. See mmdrop() */
+       if (atomic_dec_and_test(&mm->mm_count))
+               call_rcu(&mm->delayed_drop, __mmdrop_delayed);
+}
+#else
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+       mmdrop(mm);
+}
+#endif
+
  /**
   * mmget() - Pin the address space associated with a &struct mm_struct.
   * @mm: The address space to pin.
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h

index 68abc6b..dfe81e0 100644 (file)
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -7,6 +7,7 @@
  #ifndef _LINUX_SERIAL_8250_H
  #define _LINUX_SERIAL_8250_H
  
+#include <linux/atomic.h>
  #include <linux/serial_core.h>
  #include <linux/serial_reg.h>
  #include <linux/platform_device.h>
@@ -126,6 +127,8 @@ struct uart_8250_port {
  #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
         unsigned char           msr_saved_flags;
  
+       atomic_t                console_printing;
+
         struct uart_8250_dma    *dma;
         const struct uart_8250_ops *ops;
  
@@ -181,6 +184,8 @@ void serial8250_init_port(struct uart_8250_port *up);
  void serial8250_set_defaults(struct uart_8250_port *up);
  void serial8250_console_write(struct uart_8250_port *up, const char *s,
                               unsigned int count);
+void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
+                                    unsigned int count);
  int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
  int serial8250_console_exit(struct uart_port *port);
  
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index 19e595c..58b9250 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -300,6 +300,7 @@ struct sk_buff_head {
  
         __u32           qlen;
         spinlock_t      lock;
+       raw_spinlock_t  raw_lock;
  };
  
  struct sk_buff;
@@ -1992,6 +1993,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
         __skb_queue_head_init(list);
  }
  
+static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
+{
+       raw_spin_lock_init(&list->raw_lock);
+       __skb_queue_head_init(list);
+}
+
  static inline void skb_queue_head_init_class(struct sk_buff_head *list,
                 struct lock_class_key *class)
  {
diff --git a/include/linux/smp.h b/include/linux/smp.h

index 510519e..7ac9fdb 100644 (file)
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -268,6 +268,9 @@ static inline int get_boot_cpu_id(void)
  #define get_cpu()              ({ preempt_disable(); __smp_processor_id(); })
  #define put_cpu()              preempt_enable()
  
+#define get_cpu_light()                ({ migrate_disable(); __smp_processor_id(); })
+#define put_cpu_light()                migrate_enable()
+
  /*
   * Callback to arch code if there's nosmp or maxcpus=0 on the
   * boot command line:
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h

index c09b640..7f86a20 100644 (file)
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -1,7 +1,7 @@
  #ifndef __LINUX_SPINLOCK_TYPES_UP_H
  #define __LINUX_SPINLOCK_TYPES_UP_H
  
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  # error "please don't include this file directly"
  #endif
  
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h

index 0999f63..7af834b 100644 (file)
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -163,7 +163,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
         clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
  #endif /* !CONFIG_GENERIC_ENTRY */
  
-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#ifdef CONFIG_PREEMPT_LAZY
+#define tif_need_resched()     (test_thread_flag(TIF_NEED_RESCHED) || \
+                                test_thread_flag(TIF_NEED_RESCHED_LAZY))
+#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
+#define tif_need_resched_lazy()        test_thread_flag(TIF_NEED_RESCHED_LAZY)
+
+#else
+#define tif_need_resched()     test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_lazy()        0
+#endif
  
  #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
  static inline int arch_within_stack_frames(const void * const stack,
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h

index ff13717..54fe3b1 100644 (file)
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -69,6 +69,7 @@ struct trace_entry {
         unsigned char           flags;
         unsigned char           preempt_count;
         int                     pid;
+       unsigned char           preempt_lazy_count;
  };
  
  #define TRACE_EVENT_TYPE_MAX                                           \
@@ -158,9 +159,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry,
                                                 unsigned int trace_ctx)
  {
         entry->preempt_count            = trace_ctx & 0xff;
+       entry->preempt_lazy_count       = (trace_ctx >> 16) & 0xff;
         entry->pid                      = current->pid;
         entry->type                     = type;
-       entry->flags =                  trace_ctx >> 16;
+       entry->flags                    = trace_ctx >> 24;
  }
  
  unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
@@ -173,6 +175,7 @@ enum trace_flag_type {
         TRACE_FLAG_SOFTIRQ              = 0x10,
         TRACE_FLAG_PREEMPT_RESCHED      = 0x20,
         TRACE_FLAG_NMI                  = 0x40,
+       TRACE_FLAG_NEED_RESCHED_LAZY    = 0x80,
  };
  
  #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h

index e81856c..81dc1f5 100644 (file)
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -66,7 +66,7 @@
  #include <linux/seqlock.h>
  
  struct u64_stats_sync {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG==32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
         seqcount_t      seq;
  #endif
  };
@@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
         return local64_read(&p->v);
  }
  
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+       local64_set(&p->v, val);
+}
+
  static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
  {
         local64_add(val, &p->v);
@@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
         return p->v;
  }
  
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+       p->v = val;
+}
+
  static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
  {
         p->v += val;
@@ -115,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p)
  }
  #endif
  
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
  #define u64_stats_init(syncp)  seqcount_init(&(syncp)->seq)
  #else
  static inline void u64_stats_init(struct u64_stats_sync *syncp)
@@ -125,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp)
  
  static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
  {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_disable();
         write_seqcount_begin(&syncp->seq);
  #endif
  }
  
  static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
  {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
         write_seqcount_end(&syncp->seq);
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_enable();
  #endif
  }
  
@@ -142,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
  {
         unsigned long flags = 0;
  
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-       local_irq_save(flags);
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_disable();
+       else
+               local_irq_save(flags);
         write_seqcount_begin(&syncp->seq);
  #endif
         return flags;
@@ -153,15 +170,18 @@ static inline void
  u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
                                 unsigned long flags)
  {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
         write_seqcount_end(&syncp->seq);
-       local_irq_restore(flags);
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_enable();
+       else
+               local_irq_restore(flags);
  #endif
  }
  
  static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
  {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
         return read_seqcount_begin(&syncp->seq);
  #else
         return 0;
@@ -170,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *
  
  static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
  {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
         preempt_disable();
  #endif
         return __u64_stats_fetch_begin(syncp);
@@ -179,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy
  static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
                                          unsigned int start)
  {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
         return read_seqcount_retry(&syncp->seq, start);
  #else
         return false;
@@ -189,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
  static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
                                          unsigned int start)
  {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
         preempt_enable();
  #endif
         return __u64_stats_fetch_retry(syncp, start);
@@ -203,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
   */
  static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
  {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+       preempt_disable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
         local_irq_disable();
  #endif
         return __u64_stats_fetch_begin(syncp);
@@ -212,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync
  static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
                                              unsigned int start)
  {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+       preempt_enable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
         local_irq_enable();
  #endif
         return __u64_stats_fetch_retry(syncp, start);
diff --git a/include/net/act_api.h b/include/net/act_api.h

index f19f7f4..b5b624c 100644 (file)
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -30,13 +30,13 @@ struct tc_action {
         atomic_t                        tcfa_bindcnt;
         int                             tcfa_action;
         struct tcf_t                    tcfa_tm;
-       struct gnet_stats_basic_packed  tcfa_bstats;
-       struct gnet_stats_basic_packed  tcfa_bstats_hw;
+       struct gnet_stats_basic_sync    tcfa_bstats;
+       struct gnet_stats_basic_sync    tcfa_bstats_hw;
         struct gnet_stats_queue         tcfa_qstats;
         struct net_rate_estimator __rcu *tcfa_rate_est;
         spinlock_t                      tcfa_lock;
-       struct gnet_stats_basic_cpu __percpu *cpu_bstats;
-       struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
+       struct gnet_stats_basic_sync __percpu *cpu_bstats;
+       struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
         struct gnet_stats_queue __percpu *cpu_qstats;
         struct tc_cookie        __rcu *act_cookie;
         struct tcf_chain        __rcu *goto_chain;
@@ -206,7 +206,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a,
                                             struct sk_buff *skb)
  {
         if (likely(a->cpu_bstats)) {
-               bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+               bstats_update(this_cpu_ptr(a->cpu_bstats), skb);
                 return;
         }
         spin_lock(&a->tcfa_lock);
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h

index 1424e02..7aa2b8e 100644 (file)
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -7,14 +7,17 @@
  #include <linux/rtnetlink.h>
  #include <linux/pkt_sched.h>
  
-/* Note: this used to be in include/uapi/linux/gen_stats.h */
-struct gnet_stats_basic_packed {
-       __u64   bytes;
-       __u64   packets;
-};
-
-struct gnet_stats_basic_cpu {
-       struct gnet_stats_basic_packed bstats;
+/* Throughput stats.
+ * Must be initialized beforehand with gnet_stats_basic_sync_init().
+ *
+ * If no reads can ever occur parallel to writes (e.g. stack-allocated
+ * bstats), then the internal stat values can be written to and read
+ * from directly. Otherwise, use _bstats_set/update() for writes and
+ * gnet_stats_add_basic() for reads.
+ */
+struct gnet_stats_basic_sync {
+       u64_stats_t bytes;
+       u64_stats_t packets;
         struct u64_stats_sync syncp;
  } __aligned(2 * sizeof(u64));
  
@@ -34,6 +37,7 @@ struct gnet_dump {
         struct tc_stats   tc_stats;
  };
  
+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b);
  int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
                           struct gnet_dump *d, int padattr);
  
@@ -42,41 +46,38 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
                                  spinlock_t *lock, struct gnet_dump *d,
                                  int padattr);
  
-int gnet_stats_copy_basic(const seqcount_t *running,
-                         struct gnet_dump *d,
-                         struct gnet_stats_basic_cpu __percpu *cpu,
-                         struct gnet_stats_basic_packed *b);
-void __gnet_stats_copy_basic(const seqcount_t *running,
-                            struct gnet_stats_basic_packed *bstats,
-                            struct gnet_stats_basic_cpu __percpu *cpu,
-                            struct gnet_stats_basic_packed *b);
-int gnet_stats_copy_basic_hw(const seqcount_t *running,
-                            struct gnet_dump *d,
-                            struct gnet_stats_basic_cpu __percpu *cpu,
-                            struct gnet_stats_basic_packed *b);
+int gnet_stats_copy_basic(struct gnet_dump *d,
+                         struct gnet_stats_basic_sync __percpu *cpu,
+                         struct gnet_stats_basic_sync *b, bool running);
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+                         struct gnet_stats_basic_sync __percpu *cpu,
+                         struct gnet_stats_basic_sync *b, bool running);
+int gnet_stats_copy_basic_hw(struct gnet_dump *d,
+                            struct gnet_stats_basic_sync __percpu *cpu,
+                            struct gnet_stats_basic_sync *b, bool running);
  int gnet_stats_copy_rate_est(struct gnet_dump *d,
                              struct net_rate_estimator __rcu **ptr);
  int gnet_stats_copy_queue(struct gnet_dump *d,
                           struct gnet_stats_queue __percpu *cpu_q,
                           struct gnet_stats_queue *q, __u32 qlen);
-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
-                            const struct gnet_stats_queue __percpu *cpu_q,
-                            const struct gnet_stats_queue *q, __u32 qlen);
+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
+                         const struct gnet_stats_queue __percpu *cpu_q,
+                         const struct gnet_stats_queue *q);
  int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
  
  int gnet_stats_finish_copy(struct gnet_dump *d);
  
-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
-                     struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
+                     struct gnet_stats_basic_sync __percpu *cpu_bstats,
                       struct net_rate_estimator __rcu **rate_est,
                       spinlock_t *lock,
-                     seqcount_t *running, struct nlattr *opt);
+                     bool running, struct nlattr *opt);
  void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
-                         struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+                         struct gnet_stats_basic_sync __percpu *cpu_bstats,
                           struct net_rate_estimator __rcu **ptr,
                           spinlock_t *lock,
-                         seqcount_t *running, struct nlattr *opt);
+                         bool running, struct nlattr *opt);
  bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
  bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
                         struct gnet_stats_rate_est64 *sample);
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h

index 832ab69..4c3809e 100644 (file)
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -6,7 +6,7 @@
  
  struct xt_rateest {
         /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
-       struct gnet_stats_basic_packed  bstats;
+       struct gnet_stats_basic_sync    bstats;
         spinlock_t                      lock;
  
  
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h

index 83a6d07..4a58331 100644 (file)
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -765,7 +765,7 @@ struct tc_cookie {
  };
  
  struct tc_qopt_offload_stats {
-       struct gnet_stats_basic_packed *bstats;
+       struct gnet_stats_basic_sync *bstats;
         struct gnet_stats_queue *qstats;
  };
  
@@ -885,7 +885,7 @@ struct tc_gred_qopt_offload_params {
  };
  
  struct tc_gred_qopt_offload_stats {
-       struct gnet_stats_basic_packed bstats[MAX_DPs];
+       struct gnet_stats_basic_sync bstats[MAX_DPs];
         struct gnet_stats_queue qstats[MAX_DPs];
         struct red_stats *xstats[MAX_DPs];
  };
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h

index 6906da5..e9fe7a6 100644 (file)
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -40,6 +40,13 @@ enum qdisc_state_t {
         __QDISC_STATE_DRAINING,
  };
  
+enum qdisc_state2_t {
+       /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
+        * Use qdisc_run_begin/end() or qdisc_is_running() instead.
+        */
+       __QDISC_STATE2_RUNNING,
+};
+
  #define QDISC_STATE_MISSED     BIT(__QDISC_STATE_MISSED)
  #define QDISC_STATE_DRAINING   BIT(__QDISC_STATE_DRAINING)
  
@@ -97,7 +104,7 @@ struct Qdisc {
         struct netdev_queue     *dev_queue;
  
         struct net_rate_estimator __rcu *rate_est;
-       struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+       struct gnet_stats_basic_sync __percpu *cpu_bstats;
         struct gnet_stats_queue __percpu *cpu_qstats;
         int                     pad;
         refcount_t              refcnt;
@@ -107,10 +114,10 @@ struct Qdisc {
          */
         struct sk_buff_head     gso_skb ____cacheline_aligned_in_smp;
         struct qdisc_skb_head   q;
-       struct gnet_stats_basic_packed bstats;
-       seqcount_t              running;
+       struct gnet_stats_basic_sync bstats;
         struct gnet_stats_queue qstats;
         unsigned long           state;
+       unsigned long           state2; /* must be written under qdisc spinlock */
         struct Qdisc            *next_sched;
         struct sk_buff_head     skb_bad_txq;
  
@@ -143,11 +150,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
         return NULL;
  }
  
+/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
+ * root_lock section, or provide their own memory barriers -- ordering
+ * against qdisc_run_begin/end() atomic bit operations.
+ */
  static inline bool qdisc_is_running(struct Qdisc *qdisc)
  {
         if (qdisc->flags & TCQ_F_NOLOCK)
                 return spin_is_locked(&qdisc->seqlock);
-       return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
+       return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
  }
  
  static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
@@ -167,6 +178,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
         return !READ_ONCE(qdisc->q.qlen);
  }
  
+/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
+ * the qdisc root lock acquired.
+ */
  static inline bool qdisc_run_begin(struct Qdisc *qdisc)
  {
         if (qdisc->flags & TCQ_F_NOLOCK) {
@@ -186,15 +200,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
                  * when testing it in qdisc_run_end()
                  */
                 return spin_trylock(&qdisc->seqlock);
-       } else if (qdisc_is_running(qdisc)) {
-               return false;
         }
-       /* Variant of write_seqcount_begin() telling lockdep a trylock
-        * was attempted.
-        */
-       raw_write_seqcount_begin(&qdisc->running);
-       seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
-       return true;
+       return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
  }
  
  static inline void qdisc_run_end(struct Qdisc *qdisc)
@@ -212,7 +219,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
                                       &qdisc->state)))
                         __netif_schedule(qdisc);
         } else {
-               write_seqcount_end(&qdisc->running);
+               __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
         }
  }
  
@@ -576,14 +583,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
         return qdisc_lock(root);
  }
  
-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
-{
-       struct Qdisc *root = qdisc_root_sleeping(qdisc);
-
-       ASSERT_RTNL();
-       return &root->running;
-}
-
  static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
  {
         return qdisc->dev_queue->dev;
@@ -833,14 +832,16 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
         return sch->enqueue(skb, sch, to_free);
  }
  
-static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
+static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
                                   __u64 bytes, __u32 packets)
  {
-       bstats->bytes += bytes;
-       bstats->packets += packets;
+       u64_stats_update_begin(&bstats->syncp);
+       u64_stats_add(&bstats->bytes, bytes);
+       u64_stats_add(&bstats->packets, packets);
+       u64_stats_update_end(&bstats->syncp);
  }
  
-static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
+static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
                                  const struct sk_buff *skb)
  {
         _bstats_update(bstats,
@@ -848,26 +849,10 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
                        skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
  }
  
-static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
-                                     __u64 bytes, __u32 packets)
-{
-       u64_stats_update_begin(&bstats->syncp);
-       _bstats_update(&bstats->bstats, bytes, packets);
-       u64_stats_update_end(&bstats->syncp);
-}
-
-static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
-                                    const struct sk_buff *skb)
-{
-       u64_stats_update_begin(&bstats->syncp);
-       bstats_update(&bstats->bstats, skb);
-       u64_stats_update_end(&bstats->syncp);
-}
-
  static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
                                            const struct sk_buff *skb)
  {
-       bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(sch->cpu_bstats), skb);
  }
  
  static inline void qdisc_bstats_update(struct Qdisc *sch,
@@ -956,10 +941,9 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch,  __u32 *qlen,
                                              __u32 *backlog)
  {
         struct gnet_stats_queue qstats = { 0 };
-       __u32 len = qdisc_qlen_sum(sch);
  
-       __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
-       *qlen = qstats.qlen;
+       gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats);
+       *qlen = qstats.qlen + qdisc_qlen(sch);
         *backlog = qstats.backlog;
  }
  
@@ -1304,7 +1288,7 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
  struct mini_Qdisc {
         struct tcf_proto *filter_list;
         struct tcf_block *block;
-       struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+       struct gnet_stats_basic_sync __percpu *cpu_bstats;
         struct gnet_stats_queue __percpu *cpu_qstats;
         struct rcu_head rcu;
  };
@@ -1312,7 +1296,7 @@ struct mini_Qdisc {
  static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
                                                 const struct sk_buff *skb)
  {
-       bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb);
  }
  
  static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
diff --git a/init/Kconfig b/init/Kconfig

index a414439..3f80c16 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -906,7 +906,7 @@ config NUMA_BALANCING
         bool "Memory placement aware NUMA scheduler"
         depends on ARCH_SUPPORTS_NUMA_BALANCING
         depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
-       depends on SMP && NUMA && MIGRATION
+       depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
         help
           This option adds support for automatic NUMA aware memory/task placement.
           The mechanism is quite primitive and is based on migrating memory when
@@ -1901,6 +1901,7 @@ choice
  
  config SLAB
         bool "SLAB"
+       depends on !PREEMPT_RT
         select HAVE_HARDENED_USERCOPY_ALLOCATOR
         help
           The regular slab allocator that is established and known to work
@@ -1921,6 +1922,7 @@ config SLUB
  config SLOB
         depends on EXPERT
         bool "SLOB (Simple Allocator)"
+       depends on !PREEMPT_RT
         help
            SLOB replaces the stock allocator with a drastically simpler
            allocator. SLOB is generally more space efficient but
diff --git a/init/main.c b/init/main.c

index 649d9e4..ee92d60 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -1606,6 +1606,7 @@ static noinline void __init kernel_init_freeable(void)
  
         rcu_init_tasks_generic();
         do_pre_smp_initcalls();
+       rcu_tasks_initiate_self_tests();
         lockup_detector_init();
  
         smp_init();
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt

index 5876e30..5df0776 100644 (file)
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -1,5 +1,11 @@
  # SPDX-License-Identifier: GPL-2.0-only
  
+config HAVE_PREEMPT_LAZY
+       bool
+
+config PREEMPT_LAZY
+       def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
+
  choice
         prompt "Preemption Model"
         default PREEMPT_NONE
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c

index 1486768..bb3b805 100644 (file)
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -156,8 +156,9 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
                 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
                                                        cpu);
                 struct cgroup *pos = NULL;
+               unsigned long flags;
  
-               raw_spin_lock(cpu_lock);
+               raw_spin_lock_irqsave(cpu_lock, flags);
                 while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
                         struct cgroup_subsys_state *css;
  
@@ -169,7 +170,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
                                 css->ss->css_rstat_flush(css, cpu);
                         rcu_read_unlock();
                 }
-               raw_spin_unlock(cpu_lock);
+               raw_spin_unlock_irqrestore(cpu_lock, flags);
  
                 /* if @may_sleep, play nice and yield if necessary */
                 if (may_sleep && (need_resched() ||
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c

index 7beceb4..28497c0 100644 (file)
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -239,35 +239,42 @@ NOKPROBE_SYMBOL(kgdb_call_nmi_hook);
  static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd) =
         CSD_INIT(kgdb_call_nmi_hook, NULL);
  
-void __weak kgdb_roundup_cpus(void)
+void __weak kgdb_roundup_cpu(unsigned int cpu)
  {
         call_single_data_t *csd;
+       int ret;
+
+       csd = &per_cpu(kgdb_roundup_csd, cpu);
+
+       /*
+        * If it didn't round up last time, don't try again
+        * since smp_call_function_single_async() will block.
+        *
+        * If rounding_up is false then we know that the
+        * previous call must have at least started and that
+        * means smp_call_function_single_async() won't block.
+        */
+       if (kgdb_info[cpu].rounding_up)
+               return;
+       kgdb_info[cpu].rounding_up = true;
+
+       ret = smp_call_function_single_async(cpu, csd);
+       if (ret)
+               kgdb_info[cpu].rounding_up = false;
+}
+NOKPROBE_SYMBOL(kgdb_roundup_cpu);
+
+void __weak kgdb_roundup_cpus(void)
+{
         int this_cpu = raw_smp_processor_id();
         int cpu;
-       int ret;
  
         for_each_online_cpu(cpu) {
                 /* No need to roundup ourselves */
                 if (cpu == this_cpu)
                         continue;
  
-               csd = &per_cpu(kgdb_roundup_csd, cpu);
-
-               /*
-                * If it didn't round up last time, don't try again
-                * since smp_call_function_single_async() will block.
-                *
-                * If rounding_up is false then we know that the
-                * previous call must have at least started and that
-                * means smp_call_function_single_async() won't block.
-                */
-               if (kgdb_info[cpu].rounding_up)
-                       continue;
-               kgdb_info[cpu].rounding_up = true;
-
-               ret = smp_call_function_single_async(cpu, csd);
-               if (ret)
-                       kgdb_info[cpu].rounding_up = false;
+               kgdb_roundup_cpu(cpu);
         }
  }
  NOKPROBE_SYMBOL(kgdb_roundup_cpus);
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c

index 6735ac3..539a2f0 100644 (file)
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -559,23 +559,17 @@ static void kdb_msg_write(const char *msg, int msg_len)
                 cp++;
         }
  
+       /* mirror output on atomic consoles */
         for_each_console(c) {
                 if (!(c->flags & CON_ENABLED))
                         continue;
                 if (c == dbg_io_ops->cons)
                         continue;
-               /*
-                * Set oops_in_progress to encourage the console drivers to
-                * disregard their internal spin locks: in the current calling
-                * context the risk of deadlock is a bigger problem than risks
-                * due to re-entering the console driver. We operate directly on
-                * oops_in_progress rather than using bust_spinlocks() because
-                * the calls bust_spinlocks() makes on exit are not appropriate
-                * for this calling context.
-                */
-               ++oops_in_progress;
-               c->write(c, msg, msg_len);
-               --oops_in_progress;
+
+               if (!c->write_atomic)
+                       continue;
+               c->write_atomic(c, msg, msg_len);
+
                 touch_nmi_watchdog();
         }
  }
diff --git a/kernel/entry/common.c b/kernel/entry/common.c

index 998bdb7..a553fed 100644 (file)
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -159,9 +159,17 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
  
                 local_irq_enable_exit_to_user(ti_work);
  
-               if (ti_work & _TIF_NEED_RESCHED)
+               if (ti_work & _TIF_NEED_RESCHED_MASK)
                         schedule();
  
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+               if (unlikely(current->forced_info.si_signo)) {
+                       struct task_struct *t = current;
+                       force_sig_info(&t->forced_info);
+                       t->forced_info.si_signo = 0;
+               }
+#endif
+
                 if (ti_work & _TIF_UPROBE)
                         uprobe_notify_resume(regs);
  
@@ -387,7 +395,7 @@ void irqentry_exit_cond_resched(void)
                 rcu_irq_exit_check_preempt();
                 if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
                         WARN_ON_ONCE(!on_thread_stack());
-               if (need_resched())
+               if (should_resched(0))
                         preempt_schedule_irq();
         }
  }
diff --git a/kernel/exit.c b/kernel/exit.c

index 80efdfd..6ff17e9 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -64,6 +64,7 @@
  #include <linux/rcuwait.h>
  #include <linux/compat.h>
  #include <linux/io_uring.h>
+#include <linux/kprobes.h>
  #include <linux/sysfs.h>
  
  #include <linux/uaccess.h>
@@ -215,8 +216,14 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
  {
         struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
  
+       kprobe_flush_task(tsk);
         perf_event_delayed_put(tsk);
         trace_sched_process_free(tsk);
+
+       /* RT enabled kernels delay freeing the VMAP'ed task stack */
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               put_task_stack(tsk);
+
         put_task_struct(tsk);
  }
  
diff --git a/kernel/fork.c b/kernel/fork.c

index 3fb7e9e..5c820be 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -289,7 +289,10 @@ static inline void free_thread_stack(struct task_struct *tsk)
                         return;
                 }
  
-               vfree_atomic(tsk->stack);
+               if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+                       vfree_atomic(tsk->stack);
+               else
+                       vfree(tsk->stack);
                 return;
         }
  #endif
@@ -708,6 +711,19 @@ void __mmdrop(struct mm_struct *mm)
  }
  EXPORT_SYMBOL_GPL(__mmdrop);
  
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is
+ * by far the least expensive way to do that.
+ */
+void __mmdrop_delayed(struct rcu_head *rhp)
+{
+       struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
+
+       __mmdrop(mm);
+}
+#endif
+
  static void mmdrop_async_fn(struct work_struct *work)
  {
         struct mm_struct *mm;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c

index 7a45fd5..23dc888 100644 (file)
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -664,6 +664,29 @@ int generic_handle_irq(unsigned int irq)
  }
  EXPORT_SYMBOL_GPL(generic_handle_irq);
  
+/**
+ * generic_handle_irq_safe - Invoke the handler for a particular irq from any
+ *                          context.
+ * @irq:       The irq number to handle
+ *
+ * Returns:    0 on success, a negative value on error.
+ *
+ * This function can be called from any context (IRQ or process context). It
+ * will report an error if not invoked from IRQ context and the irq has been
+ * marked to enforce IRQ-context only.
+ */
+int generic_handle_irq_safe(unsigned int irq)
+{
+       unsigned long flags;
+       int ret;
+
+       local_irq_save(flags);
+       ret = handle_irq_desc(irq_to_desc(irq));
+       local_irq_restore(flags);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
+
  #ifdef CONFIG_IRQ_DOMAIN
  /**
   * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c

index 9862372..78d90ac 100644 (file)
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1301,6 +1301,8 @@ static int irq_thread(void *data)
  
         irq_thread_set_ready(desc, action);
  
+       sched_set_fifo(current);
+
         if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD,
                                            &action->thread_flags))
                 handler_fn = irq_forced_thread_fn;
@@ -1466,8 +1468,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
         if (IS_ERR(t))
                 return PTR_ERR(t);
  
-       sched_set_fifo(t);
-
         /*
          * We keep the reference to the task struct even if
          * the thread dies to avoid that the interrupt code
@@ -2861,7 +2861,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
   *     This call sets the internal irqchip state of an interrupt,
   *     depending on the value of @which.
   *
- *     This function should be called with preemption disabled if the
+ *     This function should be called with migration disabled if the
   *     interrupt controller has per-cpu registers.
   */
  int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c

index c481d84..02b2daf 100644 (file)
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -447,6 +447,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
  
  static int __init irqfixup_setup(char *str)
  {
+       if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+               pr_warn("irqfixup boot option not supported with PREEMPT_RT\n");
+               return 1;
+       }
         irqfixup = 1;
         printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
         printk(KERN_WARNING "This may impact system performance.\n");
@@ -459,6 +463,10 @@ module_param(irqfixup, int, 0644);
  
  static int __init irqpoll_setup(char *str)
  {
+       if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+               pr_warn("irqpoll boot option not supported with PREEMPT_RT\n");
+               return 1;
+       }
         irqfixup = 2;
         printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
                                 "enabled\n");
diff --git a/kernel/irq_work.c b/kernel/irq_work.c

index db8c248..f7df715 100644 (file)
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -18,11 +18,36 @@
  #include <linux/cpu.h>
  #include <linux/notifier.h>
  #include <linux/smp.h>
+#include <linux/smpboot.h>
  #include <asm/processor.h>
  #include <linux/kasan.h>
  
  static DEFINE_PER_CPU(struct llist_head, raised_list);
  static DEFINE_PER_CPU(struct llist_head, lazy_list);
+static DEFINE_PER_CPU(struct task_struct *, irq_workd);
+
+static void wake_irq_workd(void)
+{
+       struct task_struct *tsk = __this_cpu_read(irq_workd);
+
+       if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
+               wake_up_process(tsk);
+}
+
+#ifdef CONFIG_SMP
+static void irq_work_wake(struct irq_work *entry)
+{
+       wake_irq_workd();
+}
+
+static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
+       IRQ_WORK_INIT_HARD(irq_work_wake);
+#endif
+
+static int irq_workd_should_run(unsigned int cpu)
+{
+       return !llist_empty(this_cpu_ptr(&lazy_list));
+}
  
  /*
   * Claim the entry so that no one else will poke at it.
@@ -52,15 +77,29 @@ void __weak arch_irq_work_raise(void)
  /* Enqueue on current CPU, work must already be claimed and preempt disabled */
  static void __irq_work_queue_local(struct irq_work *work)
  {
+       struct llist_head *list;
+       bool rt_lazy_work = false;
+       bool lazy_work = false;
+       int work_flags;
+
+       work_flags = atomic_read(&work->node.a_flags);
+       if (work_flags & IRQ_WORK_LAZY)
+               lazy_work = true;
+       else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
+                !(work_flags & IRQ_WORK_HARD_IRQ))
+               rt_lazy_work = true;
+
+       if (lazy_work || rt_lazy_work)
+               list = this_cpu_ptr(&lazy_list);
+       else
+               list = this_cpu_ptr(&raised_list);
+
+       if (!llist_add(&work->node.llist, list))
+               return;
+
         /* If the work is "lazy", handle it from next tick if any */
-       if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
-               if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
-                   tick_nohz_tick_stopped())
-                       arch_irq_work_raise();
-       } else {
-               if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
-                       arch_irq_work_raise();
-       }
+       if (!lazy_work || tick_nohz_tick_stopped())
+               arch_irq_work_raise();
  }
  
  /* Enqueue the irq work @work on the current CPU */
@@ -104,17 +143,34 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
         if (cpu != smp_processor_id()) {
                 /* Arch remote IPI send/receive backend aren't NMI safe */
                 WARN_ON_ONCE(in_nmi());
+
+               /*
+                * On PREEMPT_RT the items which are not marked as
+                * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
+                * item is used on the remote CPU to wake the thread.
+                */
+               if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
+                   !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
+
+                       if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
+                               goto out;
+
+                       work = &per_cpu(irq_work_wakeup, cpu);
+                       if (!irq_work_claim(work))
+                               goto out;
+               }
+
                 __smp_call_single_queue(cpu, &work->node.llist);
         } else {
                 __irq_work_queue_local(work);
         }
+out:
         preempt_enable();
  
         return true;
  #endif /* CONFIG_SMP */
  }
  
-
  bool irq_work_needs_cpu(void)
  {
         struct llist_head *raised, *lazy;
@@ -160,6 +216,10 @@ void irq_work_single(void *arg)
          * else claimed it meanwhile.
          */
         (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
+
+       if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
+           !arch_irq_work_has_interrupt())
+               rcuwait_wake_up(&work->irqwait);
  }
  
  static void irq_work_run_list(struct llist_head *list)
@@ -167,7 +227,12 @@ static void irq_work_run_list(struct llist_head *list)
         struct irq_work *work, *tmp;
         struct llist_node *llnode;
  
-       BUG_ON(!irqs_disabled());
+       /*
+        * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
+        * in a per-CPU thread in preemptible context. Only the items which are
+        * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
+        */
+       BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
  
         if (llist_empty(list))
                 return;
@@ -184,7 +249,10 @@ static void irq_work_run_list(struct llist_head *list)
  void irq_work_run(void)
  {
         irq_work_run_list(this_cpu_ptr(&raised_list));
-       irq_work_run_list(this_cpu_ptr(&lazy_list));
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               irq_work_run_list(this_cpu_ptr(&lazy_list));
+       else
+               wake_irq_workd();
  }
  EXPORT_SYMBOL_GPL(irq_work_run);
  
@@ -194,7 +262,11 @@ void irq_work_tick(void)
  
         if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
                 irq_work_run_list(raised);
-       irq_work_run_list(this_cpu_ptr(&lazy_list));
+
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               irq_work_run_list(this_cpu_ptr(&lazy_list));
+       else
+               wake_irq_workd();
  }
  
  /*
@@ -204,8 +276,42 @@ void irq_work_tick(void)
  void irq_work_sync(struct irq_work *work)
  {
         lockdep_assert_irqs_enabled();
+       might_sleep();
+
+       if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
+           !arch_irq_work_has_interrupt()) {
+               rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
+                                  TASK_UNINTERRUPTIBLE);
+               return;
+       }
  
         while (irq_work_is_busy(work))
                 cpu_relax();
  }
  EXPORT_SYMBOL_GPL(irq_work_sync);
+
+static void run_irq_workd(unsigned int cpu)
+{
+       irq_work_run_list(this_cpu_ptr(&lazy_list));
+}
+
+static void irq_workd_setup(unsigned int cpu)
+{
+       sched_set_fifo_low(current);
+}
+
+static struct smp_hotplug_thread irqwork_threads = {
+       .store                  = &irq_workd,
+       .setup                  = irq_workd_setup,
+       .thread_should_run      = irq_workd_should_run,
+       .thread_fn              = run_irq_workd,
+       .thread_comm            = "irq_work/%u",
+};
+
+static __init int irq_work_init_threads(void)
+{
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
+       return 0;
+}
+early_initcall(irq_work_init_threads);
diff --git a/kernel/kcov.c b/kernel/kcov.c

index 80bfe71..36ca640 100644 (file)
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -88,6 +88,7 @@ static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas);
  
  struct kcov_percpu_data {
         void                    *irq_area;
+       local_lock_t            lock;
  
         unsigned int            saved_mode;
         unsigned int            saved_size;
@@ -96,7 +97,9 @@ struct kcov_percpu_data {
         int                     saved_sequence;
  };
  
-static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data);
+static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = {
+       .lock = INIT_LOCAL_LOCK(lock),
+};
  
  /* Must be called with kcov_remote_lock locked. */
  static struct kcov_remote *kcov_remote_find(u64 handle)
@@ -824,7 +827,7 @@ void kcov_remote_start(u64 handle)
         if (!in_task() && !in_serving_softirq())
                 return;
  
-       local_irq_save(flags);
+       local_lock_irqsave(&kcov_percpu_data.lock, flags);
  
         /*
          * Check that kcov_remote_start() is not called twice in background
@@ -832,7 +835,7 @@ void kcov_remote_start(u64 handle)
          */
         mode = READ_ONCE(t->kcov_mode);
         if (WARN_ON(in_task() && kcov_mode_enabled(mode))) {
-               local_irq_restore(flags);
+               local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
                 return;
         }
         /*
@@ -841,14 +844,15 @@ void kcov_remote_start(u64 handle)
          * happened while collecting coverage from a background thread.
          */
         if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) {
-               local_irq_restore(flags);
+               local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
                 return;
         }
  
         spin_lock(&kcov_remote_lock);
         remote = kcov_remote_find(handle);
         if (!remote) {
-               spin_unlock_irqrestore(&kcov_remote_lock, flags);
+               spin_unlock(&kcov_remote_lock);
+               local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
                 return;
         }
         kcov_debug("handle = %llx, context: %s\n", handle,
@@ -869,19 +873,19 @@ void kcov_remote_start(u64 handle)
                 size = CONFIG_KCOV_IRQ_AREA_SIZE;
                 area = this_cpu_ptr(&kcov_percpu_data)->irq_area;
         }
-       spin_unlock_irqrestore(&kcov_remote_lock, flags);
+       spin_unlock(&kcov_remote_lock);
  
         /* Can only happen when in_task(). */
         if (!area) {
+               local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
                 area = vmalloc(size * sizeof(unsigned long));
                 if (!area) {
                         kcov_put(kcov);
                         return;
                 }
+               local_lock_irqsave(&kcov_percpu_data.lock, flags);
         }
  
-       local_irq_save(flags);
-
         /* Reset coverage size. */
         *(u64 *)area = 0;
  
@@ -891,7 +895,7 @@ void kcov_remote_start(u64 handle)
         }
         kcov_start(t, kcov, size, area, mode, sequence);
  
-       local_irq_restore(flags);
+       local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
  
  }
  EXPORT_SYMBOL(kcov_remote_start);
@@ -965,12 +969,12 @@ void kcov_remote_stop(void)
         if (!in_task() && !in_serving_softirq())
                 return;
  
-       local_irq_save(flags);
+       local_lock_irqsave(&kcov_percpu_data.lock, flags);
  
         mode = READ_ONCE(t->kcov_mode);
         barrier();
         if (!kcov_mode_enabled(mode)) {
-               local_irq_restore(flags);
+               local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
                 return;
         }
         /*
@@ -978,12 +982,12 @@ void kcov_remote_stop(void)
          * actually found the remote handle and started collecting coverage.
          */
         if (in_serving_softirq() && !t->kcov_softirq) {
-               local_irq_restore(flags);
+               local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
                 return;
         }
         /* Make sure that kcov_softirq is only set when in softirq. */
         if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) {
-               local_irq_restore(flags);
+               local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
                 return;
         }
  
@@ -1013,7 +1017,7 @@ void kcov_remote_stop(void)
                 spin_unlock(&kcov_remote_lock);
         }
  
-       local_irq_restore(flags);
+       local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
  
         /* Get in kcov_remote_start(). */
         kcov_put(kcov);
@@ -1034,8 +1038,8 @@ static int __init kcov_init(void)
         int cpu;
  
         for_each_possible_cpu(cpu) {
-               void *area = vmalloc(CONFIG_KCOV_IRQ_AREA_SIZE *
-                               sizeof(unsigned long));
+               void *area = vmalloc_node(CONFIG_KCOV_IRQ_AREA_SIZE *
+                               sizeof(unsigned long), cpu_to_node(cpu));
                 if (!area)
                         return -ENOMEM;
                 per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c

index 23af2f8..01944eb 100644 (file)
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1250,10 +1250,10 @@ void kprobe_busy_end(void)
  }
  
  /*
- * This function is called from finish_task_switch when task tk becomes dead,
- * so that we can recycle any function-return probe instances associated
- * with this task. These left over instances represent probed functions
- * that have been called but will never return.
+ * This function is called from delayed_put_task_struct() when a task is
+ * dead and cleaned up to recycle any function-return probe instances
+ * associated with this task. These left over instances represent probed
+ * functions that have been called but will never return.
   */
  void kprobe_flush_task(struct task_struct *tk)
  {
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c

index 35859da..dfff31e 100644 (file)
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -138,6 +138,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
  
  #endif /* CONFIG_CRASH_CORE */
  
+#if defined(CONFIG_PREEMPT_RT)
+static ssize_t realtime_show(struct kobject *kobj,
+                            struct kobj_attribute *attr, char *buf)
+{
+       return sprintf(buf, "%d\n", 1);
+}
+KERNEL_ATTR_RO(realtime);
+#endif
+
  /* whether file capabilities are enabled */
  static ssize_t fscaps_show(struct kobject *kobj,
                                   struct kobj_attribute *attr, char *buf)
@@ -229,6 +238,9 @@ static struct attribute * kernel_attrs[] = {
         &rcu_expedited_attr.attr,
         &rcu_normal_attr.attr,
  #endif
+#ifdef CONFIG_PREEMPT_RT
+       &realtime_attr.attr,
+#endif
         NULL
  };
  
diff --git a/kernel/kthread.c b/kernel/kthread.c

index 5b37a85..4a4d709 100644 (file)
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -270,6 +270,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme);
  
  static int kthread(void *_create)
  {
+       static const struct sched_param param = { .sched_priority = 0 };
         /* Copy data: it's on kthread's stack */
         struct kthread_create_info *create = _create;
         int (*threadfn)(void *data) = create->threadfn;
@@ -300,6 +301,13 @@ static int kthread(void *_create)
         init_completion(&self->parked);
         current->vfork_done = &self->exited;
  
+       /*
+        * The new thread inherited kthreadd's priority and CPU mask. Reset
+        * back to default in case they have been changed.
+        */
+       sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
+       set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
+
         /* OK, tell user we're spawned, wait for stop or wakeup */
         __set_current_state(TASK_UNINTERRUPTIBLE);
         create->result = current;
@@ -397,7 +405,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
         }
         task = create->result;
         if (!IS_ERR(task)) {
-               static const struct sched_param param = { .sched_priority = 0 };
                 char name[TASK_COMM_LEN];
  
                 /*
@@ -406,13 +413,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
                  */
                 vsnprintf(name, sizeof(name), namefmt, args);
                 set_task_comm(task, name);
-               /*
-                * root may have changed our (kthreadd's) priority or CPU mask.
-                * The kernel thread should not inherit these properties.
-                */
-               sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
-               set_cpus_allowed_ptr(task,
-                                    housekeeping_cpumask(HK_FLAG_KTHREAD));
         }
         kfree(create);
         return task;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c

index e6a282b..ce3c8a4 100644 (file)
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -5470,6 +5470,7 @@ static noinstr void check_flags(unsigned long flags)
                 }
         }
  
+#ifndef CONFIG_PREEMPT_RT
         /*
          * We dont accurately track softirq state in e.g.
          * hardirq contexts (such as on 4KSTACKS), so only
@@ -5484,6 +5485,7 @@ static noinstr void check_flags(unsigned long flags)
                         DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
                 }
         }
+#endif
  
         if (!debug_locks)
                 print_irqtrace_events(current);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c

index ea5a701..547752d 100644 (file)
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1097,8 +1097,26 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
          * which is wrong, as the other waiter is not in a deadlock
          * situation.
          */
-       if (owner == task)
+       if (owner == task) {
+#if defined(DEBUG_WW_MUTEXES) && defined(CONFIG_DEBUG_LOCKING_API_SELFTESTS)
+               /*
+                * The lockdep selftest for ww-mutex assumes in a few cases
+                * the ww_ctx->contending_lock assignment via
+                * __ww_mutex_check_kill() which does not happen if the rtmutex
+                * detects the deadlock early.
+                */
+               if (build_ww_mutex() && ww_ctx) {
+                       struct rt_mutex *rtm;
+
+                       /* Check whether the waiter should backout immediately */
+                       rtm = container_of(lock, struct rt_mutex, rtmutex);
+
+                       __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
+                       __ww_mutex_check_kill(rtm, waiter, ww_ctx);
+               }
+#endif
                 return -EDEADLK;
+       }
  
         raw_spin_lock(&task->pi_lock);
         waiter->task = task;
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c

index 5c9299a..9002209 100644 (file)
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -21,12 +21,13 @@ int max_lock_depth = 1024;
   */
  static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock,
                                                   unsigned int state,
+                                                 struct lockdep_map *nest_lock,
                                                   unsigned int subclass)
  {
         int ret;
  
         might_sleep();
-       mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+       mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_);
         ret = __rt_mutex_lock(&lock->rtmutex, state);
         if (ret)
                 mutex_release(&lock->dep_map, _RET_IP_);
@@ -48,10 +49,16 @@ EXPORT_SYMBOL(rt_mutex_base_init);
   */
  void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
  {
-       __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
+       __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass);
  }
  EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
  
+void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock)
+{
+       __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0);
+}
+EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock);
+
  #else /* !CONFIG_DEBUG_LOCK_ALLOC */
  
  /**
@@ -61,7 +68,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
   */
  void __sched rt_mutex_lock(struct rt_mutex *lock)
  {
-       __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
+       __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0);
  }
  EXPORT_SYMBOL_GPL(rt_mutex_lock);
  #endif
@@ -77,11 +84,26 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
   */
  int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
  {
-       return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
+       return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0);
  }
  EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  
  /**
+ * rt_mutex_lock_killable - lock a rt_mutex killable
+ *
+ * @lock:              the rt_mutex to be locked
+ *
+ * Returns:
+ *  0          on success
+ * -EINTR      when interrupted by a signal
+ */
+int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
+{
+       return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
+
+/**
   * rt_mutex_trylock - try to lock a rt_mutex
   *
   * @lock:      the rt_mutex to be locked
diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c

index d2912e4..9e396a0 100644 (file)
--- a/kernel/locking/spinlock_rt.c
+++ b/kernel/locking/spinlock_rt.c
@@ -24,6 +24,17 @@
  #define RT_MUTEX_BUILD_SPINLOCKS
  #include "rtmutex.c"
  
+/*
+ * __might_resched() skips the state check as rtlocks are state
+ * preserving. Take RCU nesting into account as spin/read/write_lock() can
+ * legitimately nest into an RCU read side critical section.
+ */
+#define RTLOCK_RESCHED_OFFSETS                                         \
+       (rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT)
+
+#define rtlock_might_resched()                                         \
+       __might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS)
+
  static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
  {
         if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
@@ -32,7 +43,7 @@ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
  
  static __always_inline void __rt_spin_lock(spinlock_t *lock)
  {
-       ___might_sleep(__FILE__, __LINE__, 0);
+       rtlock_might_resched();
         rtlock_lock(&lock->lock);
         rcu_read_lock();
         migrate_disable();
@@ -210,7 +221,7 @@ EXPORT_SYMBOL(rt_write_trylock);
  
  void __sched rt_read_lock(rwlock_t *rwlock)
  {
-       ___might_sleep(__FILE__, __LINE__, 0);
+       rtlock_might_resched();
         rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
         rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
         rcu_read_lock();
@@ -220,7 +231,7 @@ EXPORT_SYMBOL(rt_read_lock);
  
  void __sched rt_write_lock(rwlock_t *rwlock)
  {
-       ___might_sleep(__FILE__, __LINE__, 0);
+       rtlock_might_resched();
         rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
         rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
         rcu_read_lock();
@@ -246,12 +257,6 @@ void __sched rt_write_unlock(rwlock_t *rwlock)
  }
  EXPORT_SYMBOL(rt_write_unlock);
  
-int __sched rt_rwlock_is_contended(rwlock_t *rwlock)
-{
-       return rw_base_is_contended(&rwlock->rwbase);
-}
-EXPORT_SYMBOL(rt_rwlock_is_contended);
-
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
                       struct lock_class_key *key)
diff --git a/kernel/panic.c b/kernel/panic.c

index 47933d4..ea5269f 100644 (file)
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -245,12 +245,27 @@ void check_panic_on_warn(const char *origin)
  void panic(const char *fmt, ...)
  {
         static char buf[1024];
+       va_list args2;
         va_list args;
         long i, i_next = 0, len;
         int state = 0;
         int old_cpu, this_cpu;
         bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
  
+       console_verbose();
+       pr_emerg("Kernel panic - not syncing:\n");
+       va_start(args2, fmt);
+       va_copy(args, args2);
+       vprintk(fmt, args2);
+       va_end(args2);
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+       /*
+        * Avoid nested stack-dumping if a panic occurs during oops processing
+        */
+       if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
+               dump_stack();
+#endif
+       pr_flush(1000, true);
         if (panic_on_warn) {
                 /*
                  * This thread may hit another WARN() in the panic path.
@@ -291,24 +306,13 @@ void panic(const char *fmt, ...)
         if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
                 panic_smp_self_stop();
  
-       console_verbose();
         bust_spinlocks(1);
-       va_start(args, fmt);
         len = vscnprintf(buf, sizeof(buf), fmt, args);
         va_end(args);
  
         if (len && buf[len - 1] == '\n')
                 buf[len - 1] = '\0';
  
-       pr_emerg("Kernel panic - not syncing: %s\n", buf);
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-       /*
-        * Avoid nested stack-dumping if a panic occurs during oops processing
-        */
-       if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
-               dump_stack();
-#endif
-
         /*
          * If kgdb is enabled, give it a chance to run before we stop all
          * the other CPUs or else we won't be able to debug processes left
@@ -617,9 +621,11 @@ static u64 oops_id;
  
  static int init_oops_id(void)
  {
+#ifndef CONFIG_PREEMPT_RT
         if (!oops_id)
                 get_random_bytes(&oops_id, sizeof(oops_id));
         else
+#endif
                 oops_id++;
  
         return 0;
@@ -630,6 +636,7 @@ static void print_oops_end_marker(void)
  {
         init_oops_id();
         pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
+       pr_flush(1000, true);
  }
  
  /*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c

index 8d856b7..7f27cfe 100644 (file)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -44,6 +44,9 @@
  #include <linux/irq_work.h>
  #include <linux/ctype.h>
  #include <linux/uio.h>
+#include <linux/kgdb.h>
+#include <linux/kthread.h>
+#include <linux/clocksource.h>
  #include <linux/sched/clock.h>
  #include <linux/sched/debug.h>
  #include <linux/sched/task_stack.h>
@@ -269,11 +272,6 @@ static void __up_console_sem(unsigned long ip)
  static int console_locked, console_suspended;
  
  /*
- * If exclusive_console is non-NULL then only this console is to be printed to.
- */
-static struct console *exclusive_console;
-
-/*
   *     Array of consoles built from command line options (console=)
   */
  
@@ -352,10 +350,13 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
   * non-prinatable characters are escaped in the "\xff" notation.
   */
  
+#ifdef CONFIG_PRINTK
  /* syslog_lock protects syslog_* variables and write access to clear_seq. */
  static DEFINE_MUTEX(syslog_lock);
  
-#ifdef CONFIG_PRINTK
+/* Set to enable sync mode. Once set, it is never cleared. */
+static bool sync_mode;
+
  DECLARE_WAIT_QUEUE_HEAD(log_wait);
  /* All 3 protected by @syslog_lock. */
  /* the next printk record to read by syslog(READ) or /proc/kmsg */
@@ -363,17 +364,6 @@ static u64 syslog_seq;
  static size_t syslog_partial;
  static bool syslog_time;
  
-/* All 3 protected by @console_sem. */
-/* the next printk record to write to the console */
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
-
-struct latched_seq {
-       seqcount_latch_t        latch;
-       u64                     val[2];
-};
-
  /*
   * The next printk record to read after the last 'clear' command. There are
   * two copies (updated with seqcount_latch) so that reads can locklessly
@@ -391,9 +381,6 @@ static struct latched_seq clear_seq = {
  #define PREFIX_MAX             32
  #endif
  
-/* the maximum size of a formatted record (i.e. with prefix added per line) */
-#define CONSOLE_LOG_MAX                1024
-
  /* the maximum size allowed to be reserved for a record */
  #define LOG_LINE_MAX           (CONSOLE_LOG_MAX - PREFIX_MAX)
  
@@ -437,7 +424,7 @@ bool printk_percpu_data_ready(void)
         return __printk_percpu_data_ready;
  }
  
-/* Must be called under syslog_lock. */
+/* Must be called under associated write-protection lock. */
  static void latched_seq_write(struct latched_seq *ls, u64 val)
  {
         raw_write_seqcount_latch(&ls->latch);
@@ -1771,188 +1758,152 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
         return do_syslog(type, buf, len, SYSLOG_FROM_READER);
  }
  
-/*
- * Special console_lock variants that help to reduce the risk of soft-lockups.
- * They allow to pass console_lock to another printk() call using a busy wait.
- */
+int printk_delay_msec __read_mostly;
  
-#ifdef CONFIG_LOCKDEP
-static struct lockdep_map console_owner_dep_map = {
-       .name = "console_owner"
-};
-#endif
+static inline void printk_delay(int level)
+{
+       boot_delay_msec(level);
  
-static DEFINE_RAW_SPINLOCK(console_owner_lock);
-static struct task_struct *console_owner;
-static bool console_waiter;
+       if (unlikely(printk_delay_msec)) {
+               int m = printk_delay_msec;
  
-/**
- * console_lock_spinning_enable - mark beginning of code where another
- *     thread might safely busy wait
- *
- * This basically converts console_lock into a spinlock. This marks
- * the section where the console_lock owner can not sleep, because
- * there may be a waiter spinning (like a spinlock). Also it must be
- * ready to hand over the lock at the end of the section.
- */
-static void console_lock_spinning_enable(void)
+               while (m--) {
+                       mdelay(1);
+                       touch_nmi_watchdog();
+               }
+       }
+}
+
+static bool kernel_sync_mode(void)
  {
-       raw_spin_lock(&console_owner_lock);
-       console_owner = current;
-       raw_spin_unlock(&console_owner_lock);
+       return (oops_in_progress || sync_mode);
+}
  
-       /* The waiter may spin on us after setting console_owner */
-       spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
+static bool console_may_sync(struct console *con)
+{
+       if (!(con->flags & CON_ENABLED))
+               return false;
+       if (con->write_atomic && kernel_sync_mode())
+               return true;
+       if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
+               return true;
+       if (con->write && (con->flags & CON_BOOT) && !con->thread)
+               return true;
+       return false;
  }
  
-/**
- * console_lock_spinning_disable_and_check - mark end of code where another
- *     thread was able to busy wait and check if there is a waiter
- *
- * This is called at the end of the section where spinning is allowed.
- * It has two functions. First, it is a signal that it is no longer
- * safe to start busy waiting for the lock. Second, it checks if
- * there is a busy waiter and passes the lock rights to her.
- *
- * Important: Callers lose the lock if there was a busy waiter.
- *     They must not touch items synchronized by console_lock
- *     in this case.
- *
- * Return: 1 if the lock rights were passed, 0 otherwise.
- */
-static int console_lock_spinning_disable_and_check(void)
+static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len)
  {
-       int waiter;
+       if (!(con->flags & CON_ENABLED))
+               return false;
  
-       raw_spin_lock(&console_owner_lock);
-       waiter = READ_ONCE(console_waiter);
-       console_owner = NULL;
-       raw_spin_unlock(&console_owner_lock);
+       if (con->write_atomic && kernel_sync_mode()) {
+               con->write_atomic(con, text, text_len);
+               return true;
+       }
  
-       if (!waiter) {
-               spin_release(&console_owner_dep_map, _THIS_IP_);
-               return 0;
+       if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) {
+               if (console_trylock()) {
+                       con->write_atomic(con, text, text_len);
+                       console_unlock();
+                       return true;
+               }
+
+       } else if (con->write && (con->flags & CON_BOOT) && !con->thread) {
+               if (console_trylock()) {
+                       con->write(con, text, text_len);
+                       console_unlock();
+                       return true;
+               }
         }
  
-       /* The waiter is now free to continue */
-       WRITE_ONCE(console_waiter, false);
+       return false;
+}
  
-       spin_release(&console_owner_dep_map, _THIS_IP_);
+static bool have_atomic_console(void)
+{
+       struct console *con;
  
-       /*
-        * Hand off console_lock to waiter. The waiter will perform
-        * the up(). After this, the waiter is the console_lock owner.
-        */
-       mutex_release(&console_lock_dep_map, _THIS_IP_);
-       return 1;
+       for_each_console(con) {
+               if (!(con->flags & CON_ENABLED))
+                       continue;
+               if (con->write_atomic)
+                       return true;
+       }
+       return false;
  }
  
-/**
- * console_trylock_spinning - try to get console_lock by busy waiting
- *
- * This allows to busy wait for the console_lock when the current
- * owner is running in specially marked sections. It means that
- * the current owner is running and cannot reschedule until it
- * is ready to lose the lock.
- *
- * Return: 1 if we got the lock, 0 othrewise
- */
-static int console_trylock_spinning(void)
+static bool print_sync(struct console *con, u64 *seq)
  {
-       struct task_struct *owner = NULL;
-       bool waiter;
-       bool spin = false;
-       unsigned long flags;
+       struct printk_info info;
+       struct printk_record r;
+       size_t text_len;
  
-       if (console_trylock())
-               return 1;
+       prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf));
  
-       printk_safe_enter_irqsave(flags);
+       if (!prb_read_valid(prb, *seq, &r))
+               return false;
  
-       raw_spin_lock(&console_owner_lock);
-       owner = READ_ONCE(console_owner);
-       waiter = READ_ONCE(console_waiter);
-       if (!waiter && owner && owner != current) {
-               WRITE_ONCE(console_waiter, true);
-               spin = true;
-       }
-       raw_spin_unlock(&console_owner_lock);
+       text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
  
-       /*
-        * If there is an active printk() writing to the
-        * consoles, instead of having it write our data too,
-        * see if we can offload that load from the active
-        * printer, and do some printing ourselves.
-        * Go into a spin only if there isn't already a waiter
-        * spinning, and there is an active printer, and
-        * that active printer isn't us (recursive printk?).
-        */
-       if (!spin) {
-               printk_safe_exit_irqrestore(flags);
-               return 0;
-       }
+       if (!call_sync_console_driver(con, &con->sync_buf[0], text_len))
+               return false;
  
-       /* We spin waiting for the owner to release us */
-       spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
-       /* Owner will clear console_waiter on hand off */
-       while (READ_ONCE(console_waiter))
-               cpu_relax();
-       spin_release(&console_owner_dep_map, _THIS_IP_);
+       *seq = r.info->seq;
  
-       printk_safe_exit_irqrestore(flags);
-       /*
-        * The owner passed the console lock to us.
-        * Since we did not spin on console lock, annotate
-        * this as a trylock. Otherwise lockdep will
-        * complain.
-        */
-       mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
+       touch_softlockup_watchdog_sync();
+       clocksource_touch_watchdog();
+       rcu_cpu_stall_reset();
+       touch_nmi_watchdog();
  
-       return 1;
+       if (text_len)
+               printk_delay(r.info->level);
+
+       return true;
  }
  
-/*
- * Call the console drivers, asking them to write out
- * log_buf[start] to log_buf[end - 1].
- * The console_lock must be held.
- */
-static void call_console_drivers(const char *ext_text, size_t ext_len,
-                                const char *text, size_t len)
+static u64 read_console_seq(struct console *con)
  {
-       static char dropped_text[64];
-       size_t dropped_len = 0;
-       struct console *con;
+       u64 seq2;
+       u64 seq;
  
-       trace_console_rcuidle(text, len);
+       seq = latched_seq_read_nolock(&con->printk_seq);
+       seq2 = latched_seq_read_nolock(&con->printk_sync_seq);
+       if (seq2 > seq)
+               seq = seq2;
+#ifdef CONFIG_HAVE_NMI
+       seq2 = latched_seq_read_nolock(&con->printk_sync_nmi_seq);
+       if (seq2 > seq)
+               seq = seq2;
+#endif
+       return seq;
+}
  
-       if (!console_drivers)
-               return;
+static void print_sync_until(struct console *con, u64 seq, bool is_locked)
+{
+       u64 printk_seq;
  
-       if (console_dropped) {
-               dropped_len = snprintf(dropped_text, sizeof(dropped_text),
-                                      "** %lu printk messages dropped **\n",
-                                      console_dropped);
-               console_dropped = 0;
-       }
+       while (!__printk_cpu_trylock())
+               cpu_relax();
  
-       for_each_console(con) {
-               if (exclusive_console && con != exclusive_console)
-                       continue;
-               if (!(con->flags & CON_ENABLED))
-                       continue;
-               if (!con->write)
-                       continue;
-               if (!cpu_online(smp_processor_id()) &&
-                   !(con->flags & CON_ANYTIME))
-                       continue;
-               if (con->flags & CON_EXTENDED)
-                       con->write(con, ext_text, ext_len);
-               else {
-                       if (dropped_len)
-                               con->write(con, dropped_text, dropped_len);
-                       con->write(con, text, len);
-               }
+       for (;;) {
+               printk_seq = read_console_seq(con);
+               if (printk_seq >= seq)
+                       break;
+               if (!print_sync(con, &printk_seq))
+                       break;
+
+               if (is_locked)
+                       latched_seq_write(&con->printk_seq, printk_seq + 1);
+#ifdef CONFIG_PRINTK_NMI
+               else if (in_nmi())
+                       latched_seq_write(&con->printk_sync_nmi_seq, printk_seq + 1);
+#endif
+               else
+                       latched_seq_write(&con->printk_sync_seq, printk_seq + 1);
         }
+
+       __printk_cpu_unlock();
  }
  
  /*
@@ -2025,20 +1976,6 @@ static u8 *__printk_recursion_counter(void)
                 local_irq_restore(flags);               \
         } while (0)
  
-int printk_delay_msec __read_mostly;
-
-static inline void printk_delay(void)
-{
-       if (unlikely(printk_delay_msec)) {
-               int m = printk_delay_msec;
-
-               while (m--) {
-                       mdelay(1);
-                       touch_nmi_watchdog();
-               }
-       }
-}
-
  static inline u32 printk_caller_id(void)
  {
         return in_task() ? task_pid_nr(current) :
@@ -2126,6 +2063,7 @@ int vprintk_store(int facility, int level,
         const u32 caller_id = printk_caller_id();
         struct prb_reserved_entry e;
         enum printk_info_flags flags = 0;
+       bool final_commit = false;
         struct printk_record r;
         unsigned long irqflags;
         u16 trunc_msg_len = 0;
@@ -2136,6 +2074,7 @@ int vprintk_store(int facility, int level,
         u16 text_len;
         int ret = 0;
         u64 ts_nsec;
+       u64 seq;
  
         /*
          * Since the duration of printk() can vary depending on the message
@@ -2174,6 +2113,7 @@ int vprintk_store(int facility, int level,
         if (flags & LOG_CONT) {
                 prb_rec_init_wr(&r, reserve_size);
                 if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
+                       seq = r.info->seq;
                         text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
                                                  facility, &flags, fmt, args);
                         r.info->text_len += text_len;
@@ -2181,6 +2121,7 @@ int vprintk_store(int facility, int level,
                         if (flags & LOG_NEWLINE) {
                                 r.info->flags |= LOG_NEWLINE;
                                 prb_final_commit(&e);
+                               final_commit = true;
                         } else {
                                 prb_commit(&e);
                         }
@@ -2204,6 +2145,7 @@ int vprintk_store(int facility, int level,
                 if (!prb_reserve(&e, prb, &r))
                         goto out;
         }
+       seq = r.info->seq;
  
         /* fill message */
         text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
@@ -2219,13 +2161,25 @@ int vprintk_store(int facility, int level,
                 memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
  
         /* A message without a trailing newline can be continued. */
-       if (!(flags & LOG_NEWLINE))
+       if (!(flags & LOG_NEWLINE)) {
                 prb_commit(&e);
-       else
+       } else {
                 prb_final_commit(&e);
+               final_commit = true;
+       }
  
         ret = text_len + trunc_msg_len;
  out:
+       /* only the kernel may perform synchronous printing */
+       if (facility == 0 && final_commit) {
+               struct console *con;
+
+               for_each_console(con) {
+                       if (console_may_sync(con))
+                               print_sync_until(con, seq + 1, false);
+               }
+       }
+
         printk_exit_irqrestore(recursion_ptr, irqflags);
         return ret;
  }
@@ -2235,40 +2189,16 @@ asmlinkage int vprintk_emit(int facility, int level,
                             const char *fmt, va_list args)
  {
         int printed_len;
-       bool in_sched = false;
  
         /* Suppress unimportant messages after panic happens */
         if (unlikely(suppress_printk))
                 return 0;
  
-       if (level == LOGLEVEL_SCHED) {
+       if (level == LOGLEVEL_SCHED)
                 level = LOGLEVEL_DEFAULT;
-               in_sched = true;
-       }
-
-       boot_delay_msec(level);
-       printk_delay();
  
         printed_len = vprintk_store(facility, level, dev_info, fmt, args);
  
-       /* If called from the scheduler, we can not call up(). */
-       if (!in_sched) {
-               /*
-                * Disable preemption to avoid being preempted while holding
-                * console_sem which would prevent anyone from printing to
-                * console
-                */
-               preempt_disable();
-               /*
-                * Try to acquire and then immediately release the console
-                * semaphore.  The release will print out buffers and wake up
-                * /dev/kmsg and syslog() users.
-                */
-               if (console_trylock_spinning())
-                       console_unlock();
-               preempt_enable();
-       }
-
         wake_up_klogd();
         return printed_len;
  }
@@ -2293,37 +2223,162 @@ asmlinkage __visible int _printk(const char *fmt, ...)
  }
  EXPORT_SYMBOL(_printk);
  
-#else /* CONFIG_PRINTK */
+static int printk_kthread_func(void *data)
+{
+       struct console *con = data;
+       unsigned long dropped = 0;
+       char *dropped_text = NULL;
+       struct printk_info info;
+       struct printk_record r;
+       char *ext_text = NULL;
+       size_t dropped_len;
+       int ret = -ENOMEM;
+       char *text = NULL;
+       char *write_text;
+       size_t len;
+       int error;
+       u64 seq;
+
+       if (con->flags & CON_EXTENDED) {
+               ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
+               if (!ext_text)
+                       goto out;
+       }
+       text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+       dropped_text = kmalloc(64, GFP_KERNEL);
+       if (!text || !dropped_text)
+               goto out;
+       if (con->flags & CON_EXTENDED)
+               write_text = ext_text;
+       else
+               write_text = text;
+
+       seq = read_console_seq(con);
  
-#define CONSOLE_LOG_MAX                0
-#define printk_time            false
+       prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
  
-#define prb_read_valid(rb, seq, r)     false
-#define prb_first_valid_seq(rb)                0
+       for (;;) {
+               error = wait_event_interruptible(log_wait,
+                                                prb_read_valid(prb, seq, &r) || kthread_should_stop());
  
-static u64 syslog_seq;
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
+               if (kthread_should_stop())
+                       break;
+
+               if (error)
+                       continue;
+
+               if (seq != r.info->seq) {
+                       dropped += r.info->seq - seq;
+                       seq = r.info->seq;
+               }
+
+               seq++;
+
+               if (!(con->flags & CON_ENABLED))
+                       continue;
+
+               if (suppress_message_printing(r.info->level))
+                       continue;
+
+               if (con->flags & CON_EXTENDED) {
+                       len = info_print_ext_header(ext_text,
+                                                   CONSOLE_EXT_LOG_MAX,
+                                                   r.info);
+                       len += msg_print_ext_body(ext_text + len,
+                                                 CONSOLE_EXT_LOG_MAX - len,
+                                                 &r.text_buf[0], r.info->text_len,
+                                                 &r.info->dev_info);
+               } else {
+                       len = record_print_text(&r,
+                                               console_msg_format & MSG_FORMAT_SYSLOG,
+                                               printk_time);
+               }
+
+               console_lock();
+
+               /*
+                * Even though the printk kthread is always preemptible, it is
+                * still not allowed to call cond_resched() from within
+                * console drivers. The task may become non-preemptible in the
+                * console driver call chain. For example, vt_console_print()
+                * takes a spinlock and then can call into fbcon_redraw(),
+                * which can conditionally invoke cond_resched().
+                */
+               console_may_schedule = 0;
+
+               if (kernel_sync_mode() && con->write_atomic) {
+                       console_unlock();
+                       break;
+               }
+
+               if (!(con->flags & CON_EXTENDED) && dropped) {
+                       dropped_len = snprintf(dropped_text, 64,
+                                              "** %lu printk messages dropped **\n",
+                                              dropped);
+                       dropped = 0;
+
+                       con->write(con, dropped_text, dropped_len);
+                       printk_delay(r.info->level);
+               }
+
+               con->write(con, write_text, len);
+               if (len)
+                       printk_delay(r.info->level);
  
-static size_t record_print_text(const struct printk_record *r,
-                               bool syslog, bool time)
+               latched_seq_write(&con->printk_seq, seq);
+
+               console_unlock();
+       }
+       ret = 0;
+out:
+       kfree(dropped_text);
+       kfree(text);
+       kfree(ext_text);
+       pr_info("%sconsole [%s%d]: printing thread stopped\n",
+               (con->flags & CON_BOOT) ? "boot" : "",
+               con->name, con->index);
+       return ret;
+}
+
+/* Must be called within console_lock(). */
+static void start_printk_kthread(struct console *con)
  {
-       return 0;
+       con->thread = kthread_run(printk_kthread_func, con,
+                                 "pr/%s%d", con->name, con->index);
+       if (IS_ERR(con->thread)) {
+               pr_err("%sconsole [%s%d]: unable to start printing thread\n",
+                      (con->flags & CON_BOOT) ? "boot" : "",
+                      con->name, con->index);
+               return;
+       }
+       pr_info("%sconsole [%s%d]: printing thread started\n",
+               (con->flags & CON_BOOT) ? "boot" : "",
+               con->name, con->index);
  }
-static ssize_t info_print_ext_header(char *buf, size_t size,
-                                    struct printk_info *info)
+
+/* protected by console_lock */
+static bool kthreads_started;
+
+/* Must be called within console_lock(). */
+static void console_try_thread(struct console *con)
  {
-       return 0;
+       if (kthreads_started) {
+               start_printk_kthread(con);
+               return;
+       }
+
+       /*
+        * The printing threads have not been started yet. If this console
+        * can print synchronously, print all unprinted messages.
+        */
+       if (console_may_sync(con)) {
+               unsigned long flags;
+
+               local_irq_save(flags);
+               print_sync_until(con, prb_next_seq(prb), true);
+               local_irq_restore(flags);
+       }
  }
-static ssize_t msg_print_ext_body(char *buf, size_t size,
-                                 char *text, size_t text_len,
-                                 struct dev_printk_info *dev_info) { return 0; }
-static void console_lock_spinning_enable(void) { }
-static int console_lock_spinning_disable_and_check(void) { return 0; }
-static void call_console_drivers(const char *ext_text, size_t ext_len,
-                                const char *text, size_t len) {}
-static bool suppress_message_printing(int level) { return false; }
  
  #endif /* CONFIG_PRINTK */
  
@@ -2580,34 +2635,6 @@ int is_console_locked(void)
  }
  EXPORT_SYMBOL(is_console_locked);
  
-/*
- * Check if we have any console that is capable of printing while cpu is
- * booting or shutting down. Requires console_sem.
- */
-static int have_callable_console(void)
-{
-       struct console *con;
-
-       for_each_console(con)
-               if ((con->flags & CON_ENABLED) &&
-                               (con->flags & CON_ANYTIME))
-                       return 1;
-
-       return 0;
-}
-
-/*
- * Can we actually use the console at this time on this cpu?
- *
- * Console drivers may assume that per-cpu resources have been allocated. So
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * call them until this CPU is officially up.
- */
-static inline int can_use_console(void)
-{
-       return cpu_online(raw_smp_processor_id()) || have_callable_console();
-}
-
  /**
   * console_unlock - unlock the console system
   *
@@ -2624,140 +2651,13 @@ static inline int can_use_console(void)
   */
  void console_unlock(void)
  {
-       static char ext_text[CONSOLE_EXT_LOG_MAX];
-       static char text[CONSOLE_LOG_MAX];
-       unsigned long flags;
-       bool do_cond_resched, retry;
-       struct printk_info info;
-       struct printk_record r;
-       u64 __maybe_unused next_seq;
-
         if (console_suspended) {
                 up_console_sem();
                 return;
         }
  
-       prb_rec_init_rd(&r, &info, text, sizeof(text));
-
-       /*
-        * Console drivers are called with interrupts disabled, so
-        * @console_may_schedule should be cleared before; however, we may
-        * end up dumping a lot of lines, for example, if called from
-        * console registration path, and should invoke cond_resched()
-        * between lines if allowable.  Not doing so can cause a very long
-        * scheduling stall on a slow console leading to RCU stall and
-        * softlockup warnings which exacerbate the issue with more
-        * messages practically incapacitating the system.
-        *
-        * console_trylock() is not able to detect the preemptive
-        * context reliably. Therefore the value must be stored before
-        * and cleared after the "again" goto label.
-        */
-       do_cond_resched = console_may_schedule;
-again:
-       console_may_schedule = 0;
-
-       /*
-        * We released the console_sem lock, so we need to recheck if
-        * cpu is online and (if not) is there at least one CON_ANYTIME
-        * console.
-        */
-       if (!can_use_console()) {
-               console_locked = 0;
-               up_console_sem();
-               return;
-       }
-
-       for (;;) {
-               size_t ext_len = 0;
-               int handover;
-               size_t len;
-
-skip:
-               if (!prb_read_valid(prb, console_seq, &r))
-                       break;
-
-               if (console_seq != r.info->seq) {
-                       console_dropped += r.info->seq - console_seq;
-                       console_seq = r.info->seq;
-               }
-
-               if (suppress_message_printing(r.info->level)) {
-                       /*
-                        * Skip record we have buffered and already printed
-                        * directly to the console when we received it, and
-                        * record that has level above the console loglevel.
-                        */
-                       console_seq++;
-                       goto skip;
-               }
-
-               /* Output to all consoles once old messages replayed. */
-               if (unlikely(exclusive_console &&
-                            console_seq >= exclusive_console_stop_seq)) {
-                       exclusive_console = NULL;
-               }
-
-               /*
-                * Handle extended console text first because later
-                * record_print_text() will modify the record buffer in-place.
-                */
-               if (nr_ext_console_drivers) {
-                       ext_len = info_print_ext_header(ext_text,
-                                               sizeof(ext_text),
-                                               r.info);
-                       ext_len += msg_print_ext_body(ext_text + ext_len,
-                                               sizeof(ext_text) - ext_len,
-                                               &r.text_buf[0],
-                                               r.info->text_len,
-                                               &r.info->dev_info);
-               }
-               len = record_print_text(&r,
-                               console_msg_format & MSG_FORMAT_SYSLOG,
-                               printk_time);
-               console_seq++;
-
-               /*
-                * While actively printing out messages, if another printk()
-                * were to occur on another CPU, it may wait for this one to
-                * finish. This task can not be preempted if there is a
-                * waiter waiting to take over.
-                *
-                * Interrupts are disabled because the hand over to a waiter
-                * must not be interrupted until the hand over is completed
-                * (@console_waiter is cleared).
-                */
-               printk_safe_enter_irqsave(flags);
-               console_lock_spinning_enable();
-
-               stop_critical_timings();        /* don't trace print latency */
-               call_console_drivers(ext_text, ext_len, text, len);
-               start_critical_timings();
-
-               handover = console_lock_spinning_disable_and_check();
-               printk_safe_exit_irqrestore(flags);
-               if (handover)
-                       return;
-
-               if (do_cond_resched)
-                       cond_resched();
-       }
-
-       /* Get consistent value of the next-to-be-used sequence number. */
-       next_seq = console_seq;
-
         console_locked = 0;
         up_console_sem();
-
-       /*
-        * Someone could have filled up the buffer again, so re-check if there's
-        * something to flush. In case we cannot trylock the console_sem again,
-        * there's a new owner and the console_unlock() from them will do the
-        * flush, no worries.
-        */
-       retry = prb_read_valid(prb, next_seq, NULL);
-       if (retry && console_trylock())
-               goto again;
  }
  EXPORT_SYMBOL(console_unlock);
  
@@ -2807,18 +2707,20 @@ void console_unblank(void)
   */
  void console_flush_on_panic(enum con_flush_mode mode)
  {
-       /*
-        * If someone else is holding the console lock, trylock will fail
-        * and may_schedule may be set.  Ignore and proceed to unlock so
-        * that messages are flushed out.  As this can be called from any
-        * context and we don't want to get preempted while flushing,
-        * ensure may_schedule is cleared.
-        */
-       console_trylock();
-       console_may_schedule = 0;
+       if (!console_trylock())
+               return;
+
+#ifdef CONFIG_PRINTK
+       if (mode == CONSOLE_REPLAY_ALL) {
+               struct console *c;
+               u64 seq;
+
+               seq = prb_first_valid_seq(prb);
+               for_each_console(c)
+                       latched_seq_write(&c->printk_seq, seq);
+       }
+#endif
  
-       if (mode == CONSOLE_REPLAY_ALL)
-               console_seq = prb_first_valid_seq(prb);
         console_unlock();
  }
  
@@ -2954,6 +2856,7 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
  void register_console(struct console *newcon)
  {
         struct console *bcon = NULL;
+       u64 __maybe_unused seq = 0;
         int err;
  
         for_each_console(bcon) {
@@ -2976,6 +2879,8 @@ void register_console(struct console *newcon)
                 }
         }
  
+       newcon->thread = NULL;
+
         if (console_drivers && console_drivers->flags & CON_BOOT)
                 bcon = console_drivers;
  
@@ -3017,8 +2922,10 @@ void register_console(struct console *newcon)
          * the real console are the same physical device, it's annoying to
          * see the beginning boot messages twice
          */
-       if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV))
+       if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
                 newcon->flags &= ~CON_PRINTBUFFER;
+               newcon->flags |= CON_HANDOVER;
+       }
  
         /*
          *      Put this console in the list - keep the
@@ -3040,27 +2947,21 @@ void register_console(struct console *newcon)
         if (newcon->flags & CON_EXTENDED)
                 nr_ext_console_drivers++;
  
-       if (newcon->flags & CON_PRINTBUFFER) {
-               /*
-                * console_unlock(); will print out the buffered messages
-                * for us.
-                *
-                * We're about to replay the log buffer.  Only do this to the
-                * just-registered console to avoid excessive message spam to
-                * the already-registered consoles.
-                *
-                * Set exclusive_console with disabled interrupts to reduce
-                * race window with eventual console_flush_on_panic() that
-                * ignores console_lock.
-                */
-               exclusive_console = newcon;
-               exclusive_console_stop_seq = console_seq;
+#ifdef CONFIG_PRINTK
+       if (!(newcon->flags & CON_PRINTBUFFER))
+               seq = prb_next_seq(prb);
  
-               /* Get a consistent copy of @syslog_seq. */
-               mutex_lock(&syslog_lock);
-               console_seq = syslog_seq;
-               mutex_unlock(&syslog_lock);
-       }
+       seqcount_latch_init(&newcon->printk_seq.latch);
+       latched_seq_write(&newcon->printk_seq, seq);
+       seqcount_latch_init(&newcon->printk_sync_seq.latch);
+       latched_seq_write(&newcon->printk_sync_seq, seq);
+#ifdef CONFIG_HAVE_NMI
+       seqcount_latch_init(&newcon->printk_sync_nmi_seq.latch);
+       latched_seq_write(&newcon->printk_sync_nmi_seq, seq);
+#endif
+
+       console_try_thread(newcon);
+#endif /* CONFIG_PRINTK */
         console_unlock();
         console_sysfs_notify();
  
@@ -3134,6 +3035,9 @@ int unregister_console(struct console *console)
         console_unlock();
         console_sysfs_notify();
  
+       if (console->thread && !IS_ERR(console->thread))
+               kthread_stop(console->thread);
+
         if (console->exit)
                 res = console->exit(console);
  
@@ -3216,6 +3120,15 @@ static int __init printk_late_init(void)
                         unregister_console(con);
                 }
         }
+
+#ifdef CONFIG_PRINTK
+       console_lock();
+       for_each_console(con)
+               start_printk_kthread(con);
+       kthreads_started = true;
+       console_unlock();
+#endif
+
         ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
                                         console_cpu_notify);
         WARN_ON(ret < 0);
@@ -3239,14 +3152,8 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
  {
         int pending = this_cpu_xchg(printk_pending, 0);
  
-       if (pending & PRINTK_PENDING_OUTPUT) {
-               /* If trylock fails, someone else is doing the printing */
-               if (console_trylock())
-                       console_unlock();
-       }
-
         if (pending & PRINTK_PENDING_WAKEUP)
-               wake_up_interruptible(&log_wait);
+               wake_up_interruptible_all(&log_wait);
  }
  
  static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
@@ -3293,7 +3200,7 @@ void defer_console_output(void)
  
  void printk_trigger_flush(void)
  {
-       defer_console_output();
+       wake_up_klogd();
  }
  
  int vprintk_deferred(const char *fmt, va_list args)
@@ -3444,6 +3351,24 @@ void kmsg_dump(enum kmsg_dump_reason reason)
  {
         struct kmsg_dumper *dumper;
  
+       if (!oops_in_progress) {
+               /*
+                * If atomic consoles are available, activate kernel sync mode
+                * to make sure any final messages are visible. The trailing
+                * printk message is important to flush any pending messages.
+                */
+               if (have_atomic_console()) {
+                       sync_mode = true;
+                       pr_info("enabled sync mode\n");
+               }
+
+               /*
+                * Give the printing threads time to flush, allowing up to
+                * 1s of no printing forward progress before giving up.
+                */
+               pr_flush(1000, true);
+       }
+
         rcu_read_lock();
         list_for_each_entry_rcu(dumper, &dump_list, list) {
                 enum kmsg_dump_reason max_reason = dumper->max_reason;
@@ -3626,6 +3551,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
  #ifdef CONFIG_SMP
  static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
  static atomic_t printk_cpulock_nested = ATOMIC_INIT(0);
+static unsigned int kgdb_cpu = -1;
  
  /**
   * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant
@@ -3705,6 +3631,9 @@ EXPORT_SYMBOL(__printk_cpu_trylock);
   */
  void __printk_cpu_unlock(void)
  {
+       bool trigger_kgdb = false;
+       unsigned int cpu;
+
         if (atomic_read(&printk_cpulock_nested)) {
                 atomic_dec(&printk_cpulock_nested);
                 return;
@@ -3715,6 +3644,12 @@ void __printk_cpu_unlock(void)
          * LMM(__printk_cpu_unlock:A)
          */
  
+       cpu = smp_processor_id();
+       if (kgdb_cpu == cpu) {
+               trigger_kgdb = true;
+               kgdb_cpu = -1;
+       }
+
         /*
          * Guarantee loads and stores from this CPU when it was the
          * lock owner are visible to the next lock owner. This pairs
@@ -3735,6 +3670,98 @@ void __printk_cpu_unlock(void)
          */
         atomic_set_release(&printk_cpulock_owner,
                            -1); /* LMM(__printk_cpu_unlock:B) */
+
+       if (trigger_kgdb) {
+               pr_warn("re-triggering kgdb roundup for CPU#%d\n", cpu);
+               kgdb_roundup_cpu(cpu);
+       }
  }
  EXPORT_SYMBOL(__printk_cpu_unlock);
+
+bool kgdb_roundup_delay(unsigned int cpu)
+{
+       if (cpu != atomic_read(&printk_cpulock_owner))
+               return false;
+
+       kgdb_cpu = cpu;
+       return true;
+}
+EXPORT_SYMBOL(kgdb_roundup_delay);
  #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PRINTK
+static void pr_msleep(bool may_sleep, int ms)
+{
+       if (may_sleep) {
+               msleep(ms);
+       } else {
+               while (ms--)
+                       udelay(1000);
+       }
+}
+
+/**
+ * pr_flush() - Wait for printing threads to catch up.
+ *
+ * @timeout_ms:        The maximum time (in ms) to wait.
+ * @reset_on_progress: Reset the timeout if forward progress is seen.
+ *
+ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
+ * represents infinite waiting.
+ *
+ * If @reset_on_progress is true, the timeout will be reset whenever any
+ * printer has been seen to make some forward progress.
+ *
+ * Context: Any context.
+ * Return: true if all enabled printers are caught up.
+ */
+bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+       int remaining = timeout_ms;
+       struct console *con;
+       u64 last_diff = 0;
+       bool may_sleep;
+       u64 printk_seq;
+       u64 diff;
+       u64 seq;
+
+       may_sleep = (preemptible() &&
+                    !in_softirq() &&
+                    system_state >= SYSTEM_RUNNING);
+
+       seq = prb_next_seq(prb);
+
+       for (;;) {
+               diff = 0;
+
+               for_each_console(con) {
+                       if (!(con->flags & CON_ENABLED))
+                               continue;
+                       printk_seq = read_console_seq(con);
+                       if (printk_seq < seq)
+                               diff += seq - printk_seq;
+               }
+
+               if (diff != last_diff && reset_on_progress)
+                       remaining = timeout_ms;
+
+               if (diff == 0 || remaining == 0)
+                       break;
+
+               if (remaining < 0) {
+                       pr_msleep(may_sleep, 100);
+               } else if (remaining < 100) {
+                       pr_msleep(may_sleep, remaining);
+                       remaining = 0;
+               } else {
+                       pr_msleep(may_sleep, 100);
+                       remaining -= 100;
+               }
+
+               last_diff = diff;
+       }
+
+       return (diff == 0);
+}
+EXPORT_SYMBOL(pr_flush);
+#endif /* CONFIG_PRINTK */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c

index 0cf5475..0df2de2 100644 (file)
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -197,7 +197,18 @@ static bool ptrace_freeze_traced(struct task_struct *task)
         spin_lock_irq(&task->sighand->siglock);
         if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
             !__fatal_signal_pending(task)) {
+#ifdef CONFIG_PREEMPT_RT
+               unsigned long flags;
+
+               raw_spin_lock_irqsave(&task->pi_lock, flags);
+               if (READ_ONCE(task->__state) & __TASK_TRACED)
+                       WRITE_ONCE(task->__state, __TASK_TRACED);
+               else
+                       task->saved_state = __TASK_TRACED;
+               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+#else
                 WRITE_ONCE(task->__state, __TASK_TRACED);
+#endif
                 ret = true;
         }
         spin_unlock_irq(&task->sighand->siglock);
@@ -207,7 +218,11 @@ static bool ptrace_freeze_traced(struct task_struct *task)
  
  static void ptrace_unfreeze_traced(struct task_struct *task)
  {
-       if (READ_ONCE(task->__state) != __TASK_TRACED)
+       unsigned long flags;
+       bool frozen = true;
+
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
+           READ_ONCE(task->__state) != __TASK_TRACED)
                 return;
  
         WARN_ON(!task->ptrace || task->parent != current);
@@ -217,12 +232,21 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
          * Recheck state under the lock to close this race.
          */
         spin_lock_irq(&task->sighand->siglock);
-       if (READ_ONCE(task->__state) == __TASK_TRACED) {
-               if (__fatal_signal_pending(task))
-                       wake_up_state(task, __TASK_TRACED);
-               else
-                       WRITE_ONCE(task->__state, TASK_TRACED);
-       }
+       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       if (READ_ONCE(task->__state) == __TASK_TRACED)
+               WRITE_ONCE(task->__state, TASK_TRACED);
+
+#ifdef CONFIG_PREEMPT_RT
+       else if (task->saved_state == __TASK_TRACED)
+               task->saved_state = TASK_TRACED;
+#endif
+       else
+               frozen = false;
+       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
+       if (frozen && __fatal_signal_pending(task))
+               wake_up_state(task, __TASK_TRACED);
+
         spin_unlock_irq(&task->sighand->siglock);
  }
  
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h

index 4bd07cc..413b120 100644 (file)
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1336,7 +1336,7 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp)
         rttd->notrun = true;
  }
  
-static void rcu_tasks_initiate_self_tests(void)
+void rcu_tasks_initiate_self_tests(void)
  {
         pr_info("Running RCU-tasks wait API self tests\n");
  #ifdef CONFIG_TASKS_RCU
@@ -1373,9 +1373,7 @@ static int rcu_tasks_verify_self_tests(void)
         return ret;
  }
  late_initcall(rcu_tasks_verify_self_tests);
-#else /* #ifdef CONFIG_PROVE_RCU */
-static void rcu_tasks_initiate_self_tests(void) { }
-#endif /* #else #ifdef CONFIG_PROVE_RCU */
+#endif /* #ifdef CONFIG_PROVE_RCU */
  
  void __init rcu_init_tasks_generic(void)
  {
@@ -1390,9 +1388,6 @@ void __init rcu_init_tasks_generic(void)
  #ifdef CONFIG_TASKS_TRACE_RCU
         rcu_spawn_tasks_trace_kthread();
  #endif
-
-       // Run the self-tests.
-       rcu_tasks_initiate_self_tests();
  }
  
  #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index cf101da..f4561b5 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2279,13 +2279,13 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
  {
         unsigned long flags;
         unsigned long mask;
-       bool needwake = false;
-       const bool offloaded = rcu_rdp_is_offloaded(rdp);
+       bool offloaded, needwake = false;
         struct rcu_node *rnp;
  
         WARN_ON_ONCE(rdp->cpu != smp_processor_id());
         rnp = rdp->mynode;
         raw_spin_lock_irqsave_rcu_node(rnp, flags);
+       offloaded = rcu_rdp_is_offloaded(rdp);
         if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
             rdp->gpwrap) {
  
@@ -2447,7 +2447,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
         int div;
         bool __maybe_unused empty;
         unsigned long flags;
-       const bool offloaded = rcu_rdp_is_offloaded(rdp);
+       bool offloaded;
         struct rcu_head *rhp;
         struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
         long bl, count = 0;
@@ -2473,6 +2473,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
         rcu_nocb_lock(rdp);
         WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
         pending = rcu_segcblist_n_cbs(&rdp->cblist);
+       offloaded = rcu_rdp_is_offloaded(rdp);
         div = READ_ONCE(rcu_divisor);
         div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
         bl = max(rdp->blimit, pending >> div);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 4443794..1065f5d 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -75,7 +75,11 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
   * Number of tasks to iterate in a single balance run.
   * Limited because this is done with IRQs disabled.
   */
+#ifdef CONFIG_PREEMPT_RT
+const_debug unsigned int sysctl_sched_nr_migrate = 8;
+#else
  const_debug unsigned int sysctl_sched_nr_migrate = 32;
+#endif
  
  /*
   * period over which we measure -rt task CPU usage in us.
@@ -983,6 +987,46 @@ void resched_curr(struct rq *rq)
                 trace_sched_wake_idle_without_ipi(cpu);
  }
  
+#ifdef CONFIG_PREEMPT_LAZY
+
+static int tsk_is_polling(struct task_struct *p)
+{
+#ifdef TIF_POLLING_NRFLAG
+       return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+#else
+       return 0;
+#endif
+}
+
+void resched_curr_lazy(struct rq *rq)
+{
+       struct task_struct *curr = rq->curr;
+       int cpu;
+
+       if (!sched_feat(PREEMPT_LAZY)) {
+               resched_curr(rq);
+               return;
+       }
+
+       if (test_tsk_need_resched(curr))
+               return;
+
+       if (test_tsk_need_resched_lazy(curr))
+               return;
+
+       set_tsk_need_resched_lazy(curr);
+
+       cpu = cpu_of(rq);
+       if (cpu == smp_processor_id())
+               return;
+
+       /* NEED_RESCHED_LAZY must be visible before we test polling */
+       smp_mb();
+       if (!tsk_is_polling(curr))
+               smp_send_reschedule(cpu);
+}
+#endif
+
  void resched_cpu(int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
@@ -2138,6 +2182,7 @@ void migrate_disable(void)
         preempt_disable();
         this_rq()->nr_pinned++;
         p->migration_disabled = 1;
+       preempt_lazy_disable();
         preempt_enable();
  }
  EXPORT_SYMBOL_GPL(migrate_disable);
@@ -2149,6 +2194,8 @@ void migrate_enable(void)
         if (p->migration_disabled > 1) {
                 p->migration_disabled--;
                 return;
+       } else if (WARN_ON_ONCE(p->migration_disabled == 0)) {
+               return;
         }
  
         /*
@@ -2166,6 +2213,7 @@ void migrate_enable(void)
         barrier();
         p->migration_disabled = 0;
         this_rq()->nr_pinned--;
+       preempt_lazy_enable();
         preempt_enable();
  }
  EXPORT_SYMBOL_GPL(migrate_enable);
@@ -3232,7 +3280,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
                  * is actually now running somewhere else!
                  */
                 while (task_running(rq, p)) {
-                       if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
+                       if (match_state && !task_match_state_lock(p, match_state))
                                 return 0;
                         cpu_relax();
                 }
@@ -3247,7 +3295,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
                 running = task_running(rq, p);
                 queued = task_on_rq_queued(p);
                 ncsw = 0;
-               if (!match_state || READ_ONCE(p->__state) == match_state)
+               if (!match_state || task_match_state_or_saved(p, match_state))
                         ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                 task_rq_unlock(rq, p, &rf);
  
@@ -3281,7 +3329,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
                         ktime_t to = NSEC_PER_SEC / HZ;
  
                         set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_hrtimeout(&to, HRTIMER_MODE_REL);
+                       schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
                         continue;
                 }
  
@@ -4424,6 +4472,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
         p->on_cpu = 0;
  #endif
         init_task_preempt_count(p);
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+       task_thread_info(p)->preempt_lazy_count = 0;
+#endif
  #ifdef CONFIG_SMP
         plist_node_init(&p->pushable_tasks, MAX_PRIO);
         RB_CLEAR_NODE(&p->pushable_dl_tasks);
@@ -4919,20 +4970,18 @@ static struct rq *finish_task_switch(struct task_struct *prev)
          */
         if (mm) {
                 membarrier_mm_sync_core_before_usermode(mm);
-               mmdrop(mm);
+               mmdrop_sched(mm);
         }
         if (unlikely(prev_state == TASK_DEAD)) {
                 if (prev->sched_class->task_dead)
                         prev->sched_class->task_dead(prev);
  
                 /*
-                * Remove function-return probe instances associated with this
-                * task and put them back on the free list.
+                * Release VMAP'ed task stack immediate for reuse. On RT
+                * enabled kernels this is delayed for latency reasons.
                  */
-               kprobe_flush_task(prev);
-
-               /* Task is done with its stack. */
-               put_task_stack(prev);
+               if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+                       put_task_stack(prev);
  
                 put_task_struct_rcu_user(prev);
         }
@@ -6332,6 +6381,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
  
         next = pick_next_task(rq, prev, &rf);
         clear_tsk_need_resched(prev);
+       clear_tsk_need_resched_lazy(prev);
         clear_preempt_need_resched();
  #ifdef CONFIG_SCHED_DEBUG
         rq->last_seen_need_resched_ns = 0;
@@ -6553,6 +6603,30 @@ static void __sched notrace preempt_schedule_common(void)
         } while (need_resched());
  }
  
+#ifdef CONFIG_PREEMPT_LAZY
+/*
+ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
+ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
+ * preempt_lazy_count counter >0.
+ */
+static __always_inline int preemptible_lazy(void)
+{
+       if (test_thread_flag(TIF_NEED_RESCHED))
+               return 1;
+       if (current_thread_info()->preempt_lazy_count)
+               return 0;
+       return 1;
+}
+
+#else
+
+static inline int preemptible_lazy(void)
+{
+       return 1;
+}
+
+#endif
+
  #ifdef CONFIG_PREEMPTION
  /*
   * This is the entry point to schedule() from in-kernel preemption
@@ -6566,7 +6640,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
          */
         if (likely(!preemptible()))
                 return;
-
+       if (!preemptible_lazy())
+               return;
         preempt_schedule_common();
  }
  NOKPROBE_SYMBOL(preempt_schedule);
@@ -6599,6 +6674,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
         if (likely(!preemptible()))
                 return;
  
+       if (!preemptible_lazy())
+               return;
+
         do {
                 /*
                  * Because the function tracer can trace preempt_count_sub()
@@ -8754,7 +8832,9 @@ void __init init_idle(struct task_struct *idle, int cpu)
  
         /* Set the preempt count _outside_ the spinlocks! */
         init_idle_preempt_count(idle, cpu);
-
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+       task_thread_info(idle)->preempt_lazy_count = 0;
+#endif
         /*
          * The idle tasks have their own, simple scheduling class:
          */
@@ -9555,14 +9635,8 @@ void __init sched_init(void)
  }
  
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-static inline int preempt_count_equals(int preempt_offset)
-{
-       int nested = preempt_count() + rcu_preempt_depth();
-
-       return (nested == preempt_offset);
-}
  
-void __might_sleep(const char *file, int line, int preempt_offset)
+void __might_sleep(const char *file, int line)
  {
         unsigned int state = get_current_state();
         /*
@@ -9576,11 +9650,32 @@ void __might_sleep(const char *file, int line, int preempt_offset)
                         (void *)current->task_state_change,
                         (void *)current->task_state_change);
  
-       ___might_sleep(file, line, preempt_offset);
+       __might_resched(file, line, 0);
  }
  EXPORT_SYMBOL(__might_sleep);
  
-void ___might_sleep(const char *file, int line, int preempt_offset)
+static void print_preempt_disable_ip(int preempt_offset, unsigned long ip)
+{
+       if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT))
+               return;
+
+       if (preempt_count() == preempt_offset)
+               return;
+
+       pr_err("Preemption disabled at:");
+       print_ip_sym(KERN_ERR, ip);
+}
+
+static inline bool resched_offsets_ok(unsigned int offsets)
+{
+       unsigned int nested = preempt_count();
+
+       nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT;
+
+       return nested == offsets;
+}
+
+void __might_resched(const char *file, int line, unsigned int offsets)
  {
         /* Ratelimiting timestamp: */
         static unsigned long prev_jiffy;
@@ -9590,7 +9685,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
         /* WARN_ON_ONCE() by default, no rate limit required: */
         rcu_sleep_check();
  
-       if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
+       if ((resched_offsets_ok(offsets) && !irqs_disabled() &&
              !is_idle_task(current) && !current->non_block_count) ||
             system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
             oops_in_progress)
@@ -9603,29 +9698,33 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
         /* Save this before calling printk(), since that will clobber it: */
         preempt_disable_ip = get_preempt_disable_ip(current);
  
-       printk(KERN_ERR
-               "BUG: sleeping function called from invalid context at %s:%d\n",
-                       file, line);
-       printk(KERN_ERR
-               "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-                       in_atomic(), irqs_disabled(), current->non_block_count,
-                       current->pid, current->comm);
+       pr_err("BUG: sleeping function called from invalid context at %s:%d\n",
+              file, line);
+       pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
+              in_atomic(), irqs_disabled(), current->non_block_count,
+              current->pid, current->comm);
+       pr_err("preempt_count: %x, expected: %x\n", preempt_count(),
+              offsets & MIGHT_RESCHED_PREEMPT_MASK);
+
+       if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
+               pr_err("RCU nest depth: %d, expected: %u\n",
+                      rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT);
+       }
  
         if (task_stack_end_corrupted(current))
-               printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
+               pr_emerg("Thread overran stack, or stack corrupted\n");
  
         debug_show_held_locks(current);
         if (irqs_disabled())
                 print_irqtrace_events(current);
-       if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-           && !preempt_count_equals(preempt_offset)) {
-               pr_err("Preemption disabled at:");
-               print_ip_sym(KERN_ERR, preempt_disable_ip);
-       }
+
+       print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK,
+                                preempt_disable_ip);
+
         dump_stack();
         add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
  }
-EXPORT_SYMBOL(___might_sleep);
+EXPORT_SYMBOL(__might_resched);
  
  void __cant_sleep(const char *file, int line, int preempt_offset)
  {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 6648683..b46225a 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4584,7 +4584,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
         ideal_runtime = sched_slice(cfs_rq, curr);
         delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
         if (delta_exec > ideal_runtime) {
-               resched_curr(rq_of(cfs_rq));
+               resched_curr_lazy(rq_of(cfs_rq));
                 /*
                  * The current task ran long enough, ensure it doesn't get
                  * re-elected due to buddy favours.
@@ -4608,7 +4608,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
                 return;
  
         if (delta > ideal_runtime)
-               resched_curr(rq_of(cfs_rq));
+               resched_curr_lazy(rq_of(cfs_rq));
  }
  
  static void
@@ -4751,7 +4751,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
          * validating it and just reschedule.
          */
         if (queued) {
-               resched_curr(rq_of(cfs_rq));
+               resched_curr_lazy(rq_of(cfs_rq));
                 return;
         }
         /*
@@ -4891,7 +4891,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
          * hierarchy can be throttled
          */
         if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
-               resched_curr(rq_of(cfs_rq));
+               resched_curr_lazy(rq_of(cfs_rq));
  }
  
  static __always_inline
@@ -5654,7 +5654,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
  
                 if (delta < 0) {
                         if (task_current(rq, p))
-                               resched_curr(rq);
+                               resched_curr_lazy(rq);
                         return;
                 }
                 hrtick_start(rq, delta);
@@ -7360,7 +7360,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
         return;
  
  preempt:
-       resched_curr(rq);
+       resched_curr_lazy(rq);
         /*
          * Only set the backward buddy when the current task is still
          * on the rq. This can happen when a wakeup gets interleaved
@@ -11342,7 +11342,7 @@ static void task_fork_fair(struct task_struct *p)
                  * 'current' within the tree based on its new key value.
                  */
                 swap(curr->vruntime, se->vruntime);
-               resched_curr(rq);
+               resched_curr_lazy(rq);
         }
  
         se->vruntime -= cfs_rq->min_vruntime;
@@ -11369,7 +11369,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
          */
         if (task_current(rq, p)) {
                 if (p->prio > oldprio)
-                       resched_curr(rq);
+                       resched_curr_lazy(rq);
         } else
                 check_preempt_curr(rq, p, 0);
  }
diff --git a/kernel/sched/features.h b/kernel/sched/features.h

index c4947c1..e13090e 100644 (file)
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -46,11 +46,19 @@ SCHED_FEAT(DOUBLE_TICK, false)
   */
  SCHED_FEAT(NONTASK_CAPACITY, true)
  
+#ifdef CONFIG_PREEMPT_RT
+SCHED_FEAT(TTWU_QUEUE, false)
+# ifdef CONFIG_PREEMPT_LAZY
+SCHED_FEAT(PREEMPT_LAZY, true)
+# endif
+#else
+
  /*
   * Queue remote wakeups on the target CPU and process them
   * using the scheduler IPI. Reduces rq->lock contention/bounces.
   */
  SCHED_FEAT(TTWU_QUEUE, true)
+#endif
  
  /*
   * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index e1f46ed..b6ba508 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2326,6 +2326,15 @@ extern void reweight_task(struct task_struct *p, int prio);
  extern void resched_curr(struct rq *rq);
  extern void resched_cpu(int cpu);
  
+#ifdef CONFIG_PREEMPT_LAZY
+extern void resched_curr_lazy(struct rq *rq);
+#else
+static inline void resched_curr_lazy(struct rq *rq)
+{
+       resched_curr(rq);
+}
+#endif
+
  extern struct rt_bandwidth def_rt_bandwidth;
  extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c

index e1c655f..f230b1a 100644 (file)
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -64,6 +64,7 @@ void swake_up_all(struct swait_queue_head *q)
         struct swait_queue *curr;
         LIST_HEAD(tmp);
  
+       WARN_ON(irqs_disabled());
         raw_spin_lock_irq(&q->lock);
         list_splice_init(&q->task_list, &tmp);
         while (!list_empty(&tmp)) {
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c

index 4e8698e..3d0157b 100644 (file)
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -526,7 +526,7 @@ static int init_rootdomain(struct root_domain *rd)
  #ifdef HAVE_RT_PUSH_IPI
         rd->rto_cpu = -1;
         raw_spin_lock_init(&rd->rto_lock);
-       init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
+       rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);
  #endif
  
         rd->visit_gen = 0;
diff --git a/kernel/signal.c b/kernel/signal.c

index c7dbb19..0bbd89f 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1324,6 +1324,34 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
         struct k_sigaction *action;
         int sig = info->si_signo;
  
+       /*
+        * On some archs, PREEMPT_RT has to delay sending a signal from a trap
+        * since it can not enable preemption, and the signal code's spin_locks
+        * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
+        * send the signal on exit of the trap.
+        */
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+       if (in_atomic()) {
+               struct task_struct *t = current;
+
+               if (WARN_ON_ONCE(t->forced_info.si_signo))
+                       return 0;
+
+               if (is_si_special(info)) {
+                       WARN_ON_ONCE(info != SEND_SIG_PRIV);
+                       t->forced_info.si_signo = info->si_signo;
+                       t->forced_info.si_errno = 0;
+                       t->forced_info.si_code = SI_KERNEL;
+                       t->forced_info.si_pid = 0;
+                       t->forced_info.si_uid = 0;
+               } else {
+                       t->forced_info = *info;
+               }
+
+               set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+               return 0;
+       }
+#endif
         spin_lock_irqsave(&t->sighand->siglock, flags);
         action = &t->sighand->action[sig-1];
         ignored = action->sa.sa_handler == SIG_IGN;
@@ -2308,16 +2336,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
                 if (gstop_done && ptrace_reparented(current))
                         do_notify_parent_cldstop(current, false, why);
  
-               /*
-                * Don't want to allow preemption here, because
-                * sys_ptrace() needs this task to be inactive.
-                *
-                * XXX: implement read_unlock_no_resched().
-                */
-               preempt_disable();
                 read_unlock(&tasklist_lock);
                 cgroup_enter_frozen();
-               preempt_enable_no_resched();
                 freezable_schedule();
                 cgroup_leave_frozen(true);
         } else {
diff --git a/kernel/smp.c b/kernel/smp.c

index 8282534..9d3c8c5 100644 (file)
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -690,10 +690,20 @@ void flush_smp_call_function_from_idle(void)
  
         cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
                       smp_processor_id(), CFD_SEQ_IDLE);
+
         local_irq_save(flags);
         flush_smp_call_function_queue(true);
-       if (local_softirq_pending())
-               do_softirq();
+
+       if (local_softirq_pending()) {
+               if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+                       do_softirq();
+               } else {
+                       struct task_struct *ksoftirqd = this_cpu_ksoftirqd();
+
+                       if (ksoftirqd && !task_is_running(ksoftirqd))
+                               wake_up_process(ksoftirqd);
+               }
+       }
  
         local_irq_restore(flags);
  }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index 06ff4de..64f4835 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2630,7 +2630,13 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
         if (test_preempt_need_resched())
                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
-       return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+#ifdef CONFIG_PREEMPT_LAZY
+       if (need_resched_lazy())
+               trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
+#endif
+
+       return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+               (preempt_lazy_count() & 0xff) << 16 |
                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
  }
  
@@ -4211,15 +4217,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
  
  static void print_lat_help_header(struct seq_file *m)
  {
-       seq_puts(m, "#                    _------=> CPU#            \n"
-                   "#                   / _-----=> irqs-off        \n"
-                   "#                  | / _----=> need-resched    \n"
-                   "#                  || / _---=> hardirq/softirq \n"
-                   "#                  ||| / _--=> preempt-depth   \n"
-                   "#                  |||| / _-=> migrate-disable \n"
-                   "#                  ||||| /     delay           \n"
-                   "#  cmd     pid     |||||| time  |   caller     \n"
-                   "#     \\   /        ||||||  \\    |    /       \n");
+       seq_puts(m, "#                    _--------=> CPU#            \n"
+                   "#                   / _-------=> irqs-off        \n"
+                   "#                  | / _------=> need-resched    \n"
+                   "#                  || / _-----=> need-resched-lazy\n"
+                   "#                  ||| / _----=> hardirq/softirq \n"
+                   "#                  |||| / _---=> preempt-depth   \n"
+                   "#                  ||||| / _--=> preempt-lazy-depth\n"
+                   "#                  |||||| / _-=> migrate-disable \n"
+                   "#                  ||||||| /     delay           \n"
+                   "#  cmd     pid     |||||||| time  |   caller     \n"
+                   "#     \\   /        ||||||||  \\    |    /       \n");
  }
  
  static void print_event_info(struct array_buffer *buf, struct seq_file *m)
@@ -4253,14 +4261,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
  
         print_event_info(buf, m);
  
-       seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
-       seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
-       seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
-       seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
-       seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
-       seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
-       seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
-       seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
+       seq_printf(m, "#                            %.*s  _-------=> irqs-off\n", prec, space);
+       seq_printf(m, "#                            %.*s / _------=> need-resched\n", prec, space);
+       seq_printf(m, "#                            %.*s| / _-----=> need-resched-lazy\n", prec, space);
+       seq_printf(m, "#                            %.*s|| / _----=> hardirq/softirq\n", prec, space);
+       seq_printf(m, "#                            %.*s||| / _---=> preempt-depth\n", prec, space);
+       seq_printf(m, "#                            %.*s|||| / _--=> preempt-lazy-depth\n", prec, space);
+       seq_printf(m, "#                            %.*s||||| / _-=> migrate-disable\n", prec, space);
+       seq_printf(m, "#                            %.*s|||||| /     delay\n", prec, space);
+       seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
+       seq_printf(m, "#              | |    %.*s   |   |||||||      |         |\n", prec, "       |    ");
  }
  
  void
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c

index 1aadc9a..ef0bd54 100644 (file)
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -193,6 +193,7 @@ static int trace_define_common_fields(void)
         /* Holds both preempt_count and migrate_disable */
         __common_field(unsigned char, preempt_count);
         __common_field(int, pid);
+       __common_field(unsigned char, preempt_lazy_count);
  
         return ret;
  }
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c

index 6b4d3f3..460bc82 100644 (file)
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -451,6 +451,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  {
         char hardsoft_irq;
         char need_resched;
+       char need_resched_lazy;
         char irqs_off;
         int hardirq;
         int softirq;
@@ -481,6 +482,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
                 break;
         }
  
+       need_resched_lazy =
+               (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
+
         hardsoft_irq =
                 (nmi && hardirq)     ? 'Z' :
                 nmi                  ? 'z' :
@@ -489,14 +493,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
                 softirq              ? 's' :
                                        '.' ;
  
-       trace_seq_printf(s, "%c%c%c",
-                        irqs_off, need_resched, hardsoft_irq);
+       trace_seq_printf(s, "%c%c%c%c",
+                        irqs_off, need_resched, need_resched_lazy,
+                        hardsoft_irq);
  
         if (entry->preempt_count & 0xf)
                 trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
         else
                 trace_seq_putc(s, '.');
  
+       if (entry->preempt_lazy_count)
+               trace_seq_printf(s, "%x", entry->preempt_lazy_count);
+       else
+               trace_seq_putc(s, '.');
+
         if (entry->preempt_count & 0xf0)
                 trace_seq_printf(s, "%x", entry->preempt_count >> 4);
         else
diff --git a/lib/bug.c b/lib/bug.c

index 45a0584..03a87df 100644 (file)
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -206,6 +206,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
         else
                 pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
                         (void *)bugaddr);
+       pr_flush(1000, true);
  
         return BUG_TRAP_TYPE_BUG;
  }
diff --git a/lib/dump_stack.c b/lib/dump_stack.c

index 6b7f1bf..6e8ae42 100644 (file)
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -102,9 +102,9 @@ asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
          * Permit this cpu to perform nested stack dumps while serialising
          * against other CPUs
          */
-       printk_cpu_lock_irqsave(flags);
+       raw_printk_cpu_lock_irqsave(flags);
         __dump_stack(log_lvl);
-       printk_cpu_unlock_irqrestore(flags);
+       raw_printk_cpu_unlock_irqrestore(flags);
  }
  EXPORT_SYMBOL(dump_stack_lvl);
  
diff --git a/lib/irq_poll.c b/lib/irq_poll.c

index 2f17b48..2b9f797 100644 (file)
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -191,11 +191,13 @@ static int irq_poll_cpu_dead(unsigned int cpu)
          * If a CPU goes away, splice its entries to the current CPU
          * and trigger a run of the softirq
          */
+       local_bh_disable();
         local_irq_disable();
         list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
                          this_cpu_ptr(&blk_cpu_iopoll));
         __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
         local_irq_enable();
+       local_bh_enable();
  
         return 0;
  }
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c

index 161108e..1266ea3 100644 (file)
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -26,6 +26,12 @@
  #include <linux/rtmutex.h>
  #include <linux/local_lock.h>
  
+#ifdef CONFIG_PREEMPT_RT
+# define NON_RT(...)
+#else
+# define NON_RT(...)   __VA_ARGS__
+#endif
+
  /*
   * Change this to 1 if you want to see the failure printouts:
   */
@@ -139,7 +145,7 @@ static DEFINE_RT_MUTEX(rtmutex_Z2);
  
  #endif
  
-static local_lock_t local_A = INIT_LOCAL_LOCK(local_A);
+static DEFINE_PER_CPU(local_lock_t, local_A);
  
  /*
   * non-inlined runtime initializers, to let separate locks share
@@ -712,12 +718,18 @@ GENERATE_TESTCASE(ABCDBCDA_rtmutex);
  
  #undef E
  
+#ifdef CONFIG_PREEMPT_RT
+# define RT_PREPARE_DBL_UNLOCK()       { migrate_disable(); rcu_read_lock(); }
+#else
+# define RT_PREPARE_DBL_UNLOCK()
+#endif
  /*
   * Double unlock:
   */
  #define E()                                    \
                                                 \
         LOCK(A);                                \
+       RT_PREPARE_DBL_UNLOCK();                \
         UNLOCK(A);                              \
         UNLOCK(A); /* fail */
  
@@ -802,6 +814,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
  #include "locking-selftest-wlock-hardirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock)
  
+#ifndef CONFIG_PREEMPT_RT
  #include "locking-selftest-spin-softirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin)
  
@@ -810,10 +823,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
  
  #include "locking-selftest-wlock-softirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
+#endif
  
  #undef E1
  #undef E2
  
+#ifndef CONFIG_PREEMPT_RT
  /*
   * Enabling hardirqs with a softirq-safe lock held:
   */
@@ -846,6 +861,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
  #undef E1
  #undef E2
  
+#endif
+
  /*
   * Enabling irqs with an irq-safe lock held:
   */
@@ -875,6 +892,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
  #include "locking-selftest-wlock-hardirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock)
  
+#ifndef CONFIG_PREEMPT_RT
  #include "locking-selftest-spin-softirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin)
  
@@ -883,6 +901,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
  
  #include "locking-selftest-wlock-softirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
+#endif
  
  #undef E1
  #undef E2
@@ -921,6 +940,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
  #include "locking-selftest-wlock-hardirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock)
  
+#ifndef CONFIG_PREEMPT_RT
  #include "locking-selftest-spin-softirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin)
  
@@ -929,6 +949,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
  
  #include "locking-selftest-wlock-softirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
+#endif
  
  #undef E1
  #undef E2
@@ -969,6 +990,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
  #include "locking-selftest-wlock-hardirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock)
  
+#ifndef CONFIG_PREEMPT_RT
  #include "locking-selftest-spin-softirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin)
  
@@ -977,6 +999,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
  
  #include "locking-selftest-wlock-softirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
+#endif
  
  #undef E1
  #undef E2
@@ -1031,6 +1054,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock)
  #include "locking-selftest-wlock-hardirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock)
  
+#ifndef CONFIG_PREEMPT_RT
  #include "locking-selftest-spin-softirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin)
  
@@ -1039,6 +1063,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock)
  
  #include "locking-selftest-wlock-softirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
+#endif
  
  #undef E1
  #undef E2
@@ -1206,12 +1231,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_rlock)
  #include "locking-selftest-wlock.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_wlock)
  
+#ifndef CONFIG_PREEMPT_RT
  #include "locking-selftest-softirq.h"
  #include "locking-selftest-rlock.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_rlock)
  
  #include "locking-selftest-wlock.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock)
+#endif
  
  #undef E1
  #undef E2
@@ -1252,12 +1279,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_rlock)
  #include "locking-selftest-wlock.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_wlock)
  
+#ifndef CONFIG_PREEMPT_RT
  #include "locking-selftest-softirq.h"
  #include "locking-selftest-rlock.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_rlock)
  
  #include "locking-selftest-wlock.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_wlock)
+#endif
  
  #undef E1
  #undef E2
@@ -1306,12 +1335,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_rlock)
  #include "locking-selftest-wlock.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_wlock)
  
+#ifndef CONFIG_PREEMPT_RT
  #include "locking-selftest-softirq.h"
  #include "locking-selftest-rlock.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_rlock)
  
  #include "locking-selftest-wlock.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
+#endif
  
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  # define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map)
@@ -1320,7 +1351,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
  # define I_MUTEX(x)    lockdep_reset_lock(&mutex_##x.dep_map)
  # define I_RWSEM(x)    lockdep_reset_lock(&rwsem_##x.dep_map)
  # define I_WW(x)       lockdep_reset_lock(&x.dep_map)
-# define I_LOCAL_LOCK(x) lockdep_reset_lock(&local_##x.dep_map)
+# define I_LOCAL_LOCK(x) lockdep_reset_lock(this_cpu_ptr(&local_##x.dep_map))
  #ifdef CONFIG_RT_MUTEXES
  # define I_RTMUTEX(x)  lockdep_reset_lock(&rtmutex_##x.dep_map)
  #endif
@@ -1380,7 +1411,7 @@ static void reset_locks(void)
         init_shared_classes();
         raw_spin_lock_init(&raw_lock_A);
         raw_spin_lock_init(&raw_lock_B);
-       local_lock_init(&local_A);
+       local_lock_init(this_cpu_ptr(&local_A));
  
         ww_mutex_init(&o, &ww_lockdep); ww_mutex_init(&o2, &ww_lockdep); ww_mutex_init(&o3, &ww_lockdep);
         memset(&t, 0, sizeof(t)); memset(&t2, 0, sizeof(t2));
@@ -1398,7 +1429,13 @@ static int unexpected_testcase_failures;
  
  static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
  {
-       unsigned long saved_preempt_count = preempt_count();
+       int saved_preempt_count = preempt_count();
+#ifdef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SMP
+       int saved_mgd_count = current->migration_disabled;
+#endif
+       int saved_rcu_count = current->rcu_read_lock_nesting;
+#endif
  
         WARN_ON(irqs_disabled());
  
@@ -1432,6 +1469,18 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
          * count, so restore it:
          */
         preempt_count_set(saved_preempt_count);
+
+#ifdef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SMP
+       while (current->migration_disabled > saved_mgd_count)
+               migrate_enable();
+#endif
+
+       while (current->rcu_read_lock_nesting > saved_rcu_count)
+               rcu_read_unlock();
+       WARN_ON_ONCE(current->rcu_read_lock_nesting < saved_rcu_count);
+#endif
+
  #ifdef CONFIG_TRACE_IRQFLAGS
         if (softirq_count())
                 current->softirqs_enabled = 0;
@@ -1499,7 +1548,7 @@ static inline void print_testname(const char *testname)
  
  #define DO_TESTCASE_2x2RW(desc, name, nr)                      \
         DO_TESTCASE_2RW("hard-"desc, name##_hard, nr)           \
-       DO_TESTCASE_2RW("soft-"desc, name##_soft, nr)           \
+       NON_RT(DO_TESTCASE_2RW("soft-"desc, name##_soft, nr))   \
  
  #define DO_TESTCASE_6x2x2RW(desc, name)                                \
         DO_TESTCASE_2x2RW(desc, name, 123);                     \
@@ -1547,19 +1596,19 @@ static inline void print_testname(const char *testname)
  
  #define DO_TESTCASE_2I(desc, name, nr)                         \
         DO_TESTCASE_1("hard-"desc, name##_hard, nr);            \
-       DO_TESTCASE_1("soft-"desc, name##_soft, nr);
+       NON_RT(DO_TESTCASE_1("soft-"desc, name##_soft, nr));
  
  #define DO_TESTCASE_2IB(desc, name, nr)                                \
         DO_TESTCASE_1B("hard-"desc, name##_hard, nr);           \
-       DO_TESTCASE_1B("soft-"desc, name##_soft, nr);
+       NON_RT(DO_TESTCASE_1B("soft-"desc, name##_soft, nr));
  
  #define DO_TESTCASE_6I(desc, name, nr)                         \
         DO_TESTCASE_3("hard-"desc, name##_hard, nr);            \
-       DO_TESTCASE_3("soft-"desc, name##_soft, nr);
+       NON_RT(DO_TESTCASE_3("soft-"desc, name##_soft, nr));
  
  #define DO_TESTCASE_6IRW(desc, name, nr)                       \
         DO_TESTCASE_3RW("hard-"desc, name##_hard, nr);          \
-       DO_TESTCASE_3RW("soft-"desc, name##_soft, nr);
+       NON_RT(DO_TESTCASE_3RW("soft-"desc, name##_soft, nr));
  
  #define DO_TESTCASE_2x3(desc, name)                            \
         DO_TESTCASE_3(desc, name, 12);                          \
@@ -1651,6 +1700,20 @@ static void ww_test_fail_acquire(void)
  #endif
  }
  
+#ifdef CONFIG_PREEMPT_RT
+#define ww_mutex_base_lock(b)                  rt_mutex_lock(b)
+#define ww_mutex_base_lock_nest_lock(b, b2)    rt_mutex_lock_nest_lock(b, b2)
+#define ww_mutex_base_lock_interruptible(b)    rt_mutex_lock_interruptible(b)
+#define ww_mutex_base_lock_killable(b)         rt_mutex_lock_killable(b)
+#define ww_mutex_base_unlock(b)                        rt_mutex_unlock(b)
+#else
+#define ww_mutex_base_lock(b)                  mutex_lock(b)
+#define ww_mutex_base_lock_nest_lock(b, b2)    mutex_lock_nest_lock(b, b2)
+#define ww_mutex_base_lock_interruptible(b)    mutex_lock_interruptible(b)
+#define ww_mutex_base_lock_killable(b)         mutex_lock_killable(b)
+#define ww_mutex_base_unlock(b)                        mutex_unlock(b)
+#endif
+
  static void ww_test_normal(void)
  {
         int ret;
@@ -1665,50 +1728,50 @@ static void ww_test_normal(void)
  
         /* mutex_lock (and indirectly, mutex_lock_nested) */
         o.ctx = (void *)~0UL;
-       mutex_lock(&o.base);
-       mutex_unlock(&o.base);
+       ww_mutex_base_lock(&o.base);
+       ww_mutex_base_unlock(&o.base);
         WARN_ON(o.ctx != (void *)~0UL);
  
         /* mutex_lock_interruptible (and *_nested) */
         o.ctx = (void *)~0UL;
-       ret = mutex_lock_interruptible(&o.base);
+       ret = ww_mutex_base_lock_interruptible(&o.base);
         if (!ret)
-               mutex_unlock(&o.base);
+               ww_mutex_base_unlock(&o.base);
         else
                 WARN_ON(1);
         WARN_ON(o.ctx != (void *)~0UL);
  
         /* mutex_lock_killable (and *_nested) */
         o.ctx = (void *)~0UL;
-       ret = mutex_lock_killable(&o.base);
+       ret = ww_mutex_base_lock_killable(&o.base);
         if (!ret)
-               mutex_unlock(&o.base);
+               ww_mutex_base_unlock(&o.base);
         else
                 WARN_ON(1);
         WARN_ON(o.ctx != (void *)~0UL);
  
         /* trylock, succeeding */
         o.ctx = (void *)~0UL;
-       ret = mutex_trylock(&o.base);
+       ret = ww_mutex_base_trylock(&o.base);
         WARN_ON(!ret);
         if (ret)
-               mutex_unlock(&o.base);
+               ww_mutex_base_unlock(&o.base);
         else
                 WARN_ON(1);
         WARN_ON(o.ctx != (void *)~0UL);
  
         /* trylock, failing */
         o.ctx = (void *)~0UL;
-       mutex_lock(&o.base);
-       ret = mutex_trylock(&o.base);
+       ww_mutex_base_lock(&o.base);
+       ret = ww_mutex_base_trylock(&o.base);
         WARN_ON(ret);
-       mutex_unlock(&o.base);
+       ww_mutex_base_unlock(&o.base);
         WARN_ON(o.ctx != (void *)~0UL);
  
         /* nest_lock */
         o.ctx = (void *)~0UL;
-       mutex_lock_nest_lock(&o.base, &t);
-       mutex_unlock(&o.base);
+       ww_mutex_base_lock_nest_lock(&o.base, &t);
+       ww_mutex_base_unlock(&o.base);
         WARN_ON(o.ctx != (void *)~0UL);
  }
  
@@ -1721,7 +1784,7 @@ static void ww_test_two_contexts(void)
  static void ww_test_diff_class(void)
  {
         WWAI(&t);
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
         t.ww_class = NULL;
  #endif
         WWL(&o, &t);
@@ -1785,7 +1848,7 @@ static void ww_test_edeadlk_normal(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         o2.ctx = &t2;
         mutex_release(&o2.base.dep_map, _THIS_IP_);
  
@@ -1801,7 +1864,7 @@ static void ww_test_edeadlk_normal(void)
  
         o2.ctx = NULL;
         mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
-       mutex_unlock(&o2.base);
+       ww_mutex_base_unlock(&o2.base);
         WWU(&o);
  
         WWL(&o2, &t);
@@ -1811,7 +1874,7 @@ static void ww_test_edeadlk_normal_slow(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         mutex_release(&o2.base.dep_map, _THIS_IP_);
         o2.ctx = &t2;
  
@@ -1827,7 +1890,7 @@ static void ww_test_edeadlk_normal_slow(void)
  
         o2.ctx = NULL;
         mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
-       mutex_unlock(&o2.base);
+       ww_mutex_base_unlock(&o2.base);
         WWU(&o);
  
         ww_mutex_lock_slow(&o2, &t);
@@ -1837,7 +1900,7 @@ static void ww_test_edeadlk_no_unlock(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         o2.ctx = &t2;
         mutex_release(&o2.base.dep_map, _THIS_IP_);
  
@@ -1853,7 +1916,7 @@ static void ww_test_edeadlk_no_unlock(void)
  
         o2.ctx = NULL;
         mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
-       mutex_unlock(&o2.base);
+       ww_mutex_base_unlock(&o2.base);
  
         WWL(&o2, &t);
  }
@@ -1862,7 +1925,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         mutex_release(&o2.base.dep_map, _THIS_IP_);
         o2.ctx = &t2;
  
@@ -1878,7 +1941,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
  
         o2.ctx = NULL;
         mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
-       mutex_unlock(&o2.base);
+       ww_mutex_base_unlock(&o2.base);
  
         ww_mutex_lock_slow(&o2, &t);
  }
@@ -1887,7 +1950,7 @@ static void ww_test_edeadlk_acquire_more(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         mutex_release(&o2.base.dep_map, _THIS_IP_);
         o2.ctx = &t2;
  
@@ -1908,7 +1971,7 @@ static void ww_test_edeadlk_acquire_more_slow(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         mutex_release(&o2.base.dep_map, _THIS_IP_);
         o2.ctx = &t2;
  
@@ -1929,11 +1992,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         mutex_release(&o2.base.dep_map, _THIS_IP_);
         o2.ctx = &t2;
  
-       mutex_lock(&o3.base);
+       ww_mutex_base_lock(&o3.base);
         mutex_release(&o3.base.dep_map, _THIS_IP_);
         o3.ctx = &t2;
  
@@ -1955,11 +2018,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk_slow(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         mutex_release(&o2.base.dep_map, _THIS_IP_);
         o2.ctx = &t2;
  
-       mutex_lock(&o3.base);
+       ww_mutex_base_lock(&o3.base);
         mutex_release(&o3.base.dep_map, _THIS_IP_);
         o3.ctx = &t2;
  
@@ -1980,7 +2043,7 @@ static void ww_test_edeadlk_acquire_wrong(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         mutex_release(&o2.base.dep_map, _THIS_IP_);
         o2.ctx = &t2;
  
@@ -2005,7 +2068,7 @@ static void ww_test_edeadlk_acquire_wrong_slow(void)
  {
         int ret;
  
-       mutex_lock(&o2.base);
+       ww_mutex_base_lock(&o2.base);
         mutex_release(&o2.base.dep_map, _THIS_IP_);
         o2.ctx = &t2;
  
@@ -2646,8 +2709,8 @@ static void wait_context_tests(void)
  
  static void local_lock_2(void)
  {
-       local_lock_acquire(&local_A);   /* IRQ-ON */
-       local_lock_release(&local_A);
+       local_lock(&local_A);   /* IRQ-ON */
+       local_unlock(&local_A);
  
         HARDIRQ_ENTER();
         spin_lock(&lock_A);             /* IN-IRQ */
@@ -2656,18 +2719,18 @@ static void local_lock_2(void)
  
         HARDIRQ_DISABLE();
         spin_lock(&lock_A);
-       local_lock_acquire(&local_A);   /* IN-IRQ <-> IRQ-ON cycle, false */
-       local_lock_release(&local_A);
+       local_lock(&local_A);   /* IN-IRQ <-> IRQ-ON cycle, false */
+       local_unlock(&local_A);
         spin_unlock(&lock_A);
         HARDIRQ_ENABLE();
  }
  
  static void local_lock_3A(void)
  {
-       local_lock_acquire(&local_A);   /* IRQ-ON */
+       local_lock(&local_A);   /* IRQ-ON */
         spin_lock(&lock_B);             /* IRQ-ON */
         spin_unlock(&lock_B);
-       local_lock_release(&local_A);
+       local_unlock(&local_A);
  
         HARDIRQ_ENTER();
         spin_lock(&lock_A);             /* IN-IRQ */
@@ -2676,18 +2739,18 @@ static void local_lock_3A(void)
  
         HARDIRQ_DISABLE();
         spin_lock(&lock_A);
-       local_lock_acquire(&local_A);   /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
-       local_lock_release(&local_A);
+       local_lock(&local_A);   /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+       local_unlock(&local_A);
         spin_unlock(&lock_A);
         HARDIRQ_ENABLE();
  }
  
  static void local_lock_3B(void)
  {
-       local_lock_acquire(&local_A);   /* IRQ-ON */
+       local_lock(&local_A);   /* IRQ-ON */
         spin_lock(&lock_B);             /* IRQ-ON */
         spin_unlock(&lock_B);
-       local_lock_release(&local_A);
+       local_unlock(&local_A);
  
         HARDIRQ_ENTER();
         spin_lock(&lock_A);             /* IN-IRQ */
@@ -2696,8 +2759,8 @@ static void local_lock_3B(void)
  
         HARDIRQ_DISABLE();
         spin_lock(&lock_A);
-       local_lock_acquire(&local_A);   /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
-       local_lock_release(&local_A);
+       local_lock(&local_A);   /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+       local_unlock(&local_A);
         spin_unlock(&lock_A);
         HARDIRQ_ENABLE();
  
@@ -2812,7 +2875,7 @@ void locking_selftest(void)
         printk("------------------------\n");
         printk("| Locking API testsuite:\n");
         printk("----------------------------------------------------------------------------\n");
-       printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |\n");
+       printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |rtmutex\n");
         printk("  --------------------------------------------------------------------------\n");
  
         init_shared_classes();
@@ -2885,12 +2948,11 @@ void locking_selftest(void)
         DO_TESTCASE_6x1RR("rlock W1R2/R2R3/W3W1", W1R2_R2R3_W3W1);
  
         printk("  --------------------------------------------------------------------------\n");
-
         /*
          * irq-context testcases:
          */
         DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
-       DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
+       NON_RT(DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A));
         DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
         DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
         DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c

index 199ab20..0641020 100644 (file)
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -99,7 +99,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
                  * Allow nested NMI backtraces while serializing
                  * against other CPUs.
                  */
-               printk_cpu_lock_irqsave(flags);
+               raw_printk_cpu_lock_irqsave(flags);
                 if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) {
                         pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
                                 cpu, (void *)instruction_pointer(regs));
@@ -110,7 +110,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
                         else
                                 dump_stack();
                 }
-               printk_cpu_unlock_irqrestore(flags);
+               raw_printk_cpu_unlock_irqrestore(flags);
                 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
                 return true;
         }
diff --git a/lib/scatterlist.c b/lib/scatterlist.c

index abb3432..d5e82e4 100644 (file)
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -828,8 +828,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
   *   stops @miter.
   *
   * Context:
- *   Don't care if @miter is stopped, or not proceeded yet.
- *   Otherwise, preemption disabled if the SG_MITER_ATOMIC is set.
+ *   Don't care.
   *
   * Returns:
   *   true if @miter contains the valid mapping.  false if end of sg
@@ -865,8 +864,7 @@ EXPORT_SYMBOL(sg_miter_skip);
   *   @miter->addr and @miter->length point to the current mapping.
   *
   * Context:
- *   Preemption disabled if SG_MITER_ATOMIC.  Preemption must stay disabled
- *   till @miter is stopped.  May sleep if !SG_MITER_ATOMIC.
+ *   May sleep if !SG_MITER_ATOMIC.
   *
   * Returns:
   *   true if @miter contains the next mapping.  false if end of sg
@@ -906,8 +904,7 @@ EXPORT_SYMBOL(sg_miter_next);
   *   need to be released during iteration.
   *
   * Context:
- *   Preemption disabled if the SG_MITER_ATOMIC is set.  Don't care
- *   otherwise.
+ *   Don't care otherwise.
   */
  void sg_miter_stop(struct sg_mapping_iter *miter)
  {
@@ -922,7 +919,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
                         flush_dcache_page(miter->page);
  
                 if (miter->__flags & SG_MITER_ATOMIC) {
-                       WARN_ON_ONCE(preemptible());
+                       WARN_ON_ONCE(!pagefault_disabled());
                         kunmap_atomic(miter->addr);
                 } else
                         kunmap(miter->page);
diff --git a/localversion-rt b/localversion-rt

new file mode 100644 (file)

index 0000000..c06cc43
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
+-rt57
diff --git a/mm/Kconfig b/mm/Kconfig

index 6fd4380..b43c719 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -396,7 +396,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
  
  config TRANSPARENT_HUGEPAGE
         bool "Transparent Hugepage Support"
-       depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT
         select COMPACTION
         select XARRAY_MULTI
         help
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index dd747cd..e2ea2fb 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -654,6 +654,35 @@ static u64 flush_next_time;
  
  #define FLUSH_TIME (2UL*HZ)
  
+/*
+ * Accessors to ensure that preemption is disabled on PREEMPT_RT because it can
+ * not rely on this as part of an acquired spinlock_t lock. These functions are
+ * never used in hardirq context on PREEMPT_RT and therefore disabling preemtion
+ * is sufficient.
+ */
+static void memcg_stats_lock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+      preempt_disable();
+#else
+      VM_BUG_ON(!irqs_disabled());
+#endif
+}
+
+static void __memcg_stats_lock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+      preempt_disable();
+#endif
+}
+
+static void memcg_stats_unlock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+      preempt_enable();
+#endif
+}
+
  static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
  {
         unsigned int x;
@@ -737,6 +766,27 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
         pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
         memcg = pn->memcg;
  
+       /*
+        * The caller from rmap relay on disabled preemption becase they never
+        * update their counter from in-interrupt context. For these two
+        * counters we check that the update is never performed from an
+        * interrupt context while other caller need to have disabled interrupt.
+        */
+       __memcg_stats_lock();
+       if (IS_ENABLED(CONFIG_DEBUG_VM) && !IS_ENABLED(CONFIG_PREEMPT_RT)) {
+               switch (idx) {
+               case NR_ANON_MAPPED:
+               case NR_FILE_MAPPED:
+               case NR_ANON_THPS:
+               case NR_SHMEM_PMDMAPPED:
+               case NR_FILE_PMDMAPPED:
+                       WARN_ON_ONCE(!in_task());
+                       break;
+               default:
+                       WARN_ON_ONCE(!irqs_disabled());
+               }
+       }
+
         /* Update memcg */
         __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
  
@@ -744,6 +794,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
         __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
  
         memcg_rstat_updated(memcg, val);
+       memcg_stats_unlock();
  }
  
  /**
@@ -844,8 +895,10 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
         if (mem_cgroup_disabled())
                 return;
  
+       memcg_stats_lock();
         __this_cpu_add(memcg->vmstats_percpu->events[idx], count);
         memcg_rstat_updated(memcg, count);
+       memcg_stats_unlock();
  }
  
  static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
@@ -909,6 +962,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
   */
  static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
  {
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               return;
+
         /* threshold event is triggered in finer grain than soft limit */
         if (unlikely(mem_cgroup_event_ratelimit(memcg,
                                                 MEM_CGROUP_TARGET_THRESH))) {
@@ -2102,39 +2158,37 @@ void unlock_page_memcg(struct page *page)
  }
  EXPORT_SYMBOL(unlock_page_memcg);
  
-struct obj_stock {
+struct memcg_stock_pcp {
+       local_lock_t stock_lock;
+       struct mem_cgroup *cached; /* this never be root cgroup */
+       unsigned int nr_pages;
+
  #ifdef CONFIG_MEMCG_KMEM
         struct obj_cgroup *cached_objcg;
         struct pglist_data *cached_pgdat;
         unsigned int nr_bytes;
         int nr_slab_reclaimable_b;
         int nr_slab_unreclaimable_b;
-#else
-       int dummy[0];
  #endif
-};
-
-struct memcg_stock_pcp {
-       struct mem_cgroup *cached; /* this never be root cgroup */
-       unsigned int nr_pages;
-       struct obj_stock task_obj;
-       struct obj_stock irq_obj;
  
         struct work_struct work;
         unsigned long flags;
  #define FLUSHING_CACHED_CHARGE 0
  };
-static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
+static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = {
+       .stock_lock = INIT_LOCAL_LOCK(stock_lock),
+};
  static DEFINE_MUTEX(percpu_charge_mutex);
  
  #ifdef CONFIG_MEMCG_KMEM
-static void drain_obj_stock(struct obj_stock *stock);
+static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock);
  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
                                      struct mem_cgroup *root_memcg);
  
  #else
-static inline void drain_obj_stock(struct obj_stock *stock)
+static inline struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
  {
+       return NULL;
  }
  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
                                      struct mem_cgroup *root_memcg)
@@ -2144,41 +2198,6 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
  #endif
  
  /*
- * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
- * sequence used in this case to access content from object stock is slow.
- * To optimize for user context access, there are now two object stocks for
- * task context and interrupt context access respectively.
- *
- * The task context object stock can be accessed by disabling preemption only
- * which is cheap in non-preempt kernel. The interrupt context object stock
- * can only be accessed after disabling interrupt. User context code can
- * access interrupt object stock, but not vice versa.
- */
-static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
-{
-       struct memcg_stock_pcp *stock;
-
-       if (likely(in_task())) {
-               *pflags = 0UL;
-               preempt_disable();
-               stock = this_cpu_ptr(&memcg_stock);
-               return &stock->task_obj;
-       }
-
-       local_irq_save(*pflags);
-       stock = this_cpu_ptr(&memcg_stock);
-       return &stock->irq_obj;
-}
-
-static inline void put_obj_stock(unsigned long flags)
-{
-       if (likely(in_task()))
-               preempt_enable();
-       else
-               local_irq_restore(flags);
-}
-
-/**
   * consume_stock: Try to consume stocked charge on this cpu.
   * @memcg: memcg to consume from.
   * @nr_pages: how many pages to charge.
@@ -2198,7 +2217,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
         if (nr_pages > MEMCG_CHARGE_BATCH)
                 return ret;
  
-       local_irq_save(flags);
+       local_lock_irqsave(&memcg_stock.stock_lock, flags);
  
         stock = this_cpu_ptr(&memcg_stock);
         if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
@@ -2206,7 +2225,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
                 ret = true;
         }
  
-       local_irq_restore(flags);
+       local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
  
         return ret;
  }
@@ -2235,6 +2254,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
  static void drain_local_stock(struct work_struct *dummy)
  {
         struct memcg_stock_pcp *stock;
+       struct obj_cgroup *old = NULL;
         unsigned long flags;
  
         /*
@@ -2242,28 +2262,25 @@ static void drain_local_stock(struct work_struct *dummy)
          * drain_stock races is that we always operate on local CPU stock
          * here with IRQ disabled
          */
-       local_irq_save(flags);
+       local_lock_irqsave(&memcg_stock.stock_lock, flags);
  
         stock = this_cpu_ptr(&memcg_stock);
-       drain_obj_stock(&stock->irq_obj);
-       if (in_task())
-               drain_obj_stock(&stock->task_obj);
+       old = drain_obj_stock(stock);
         drain_stock(stock);
         clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
  
-       local_irq_restore(flags);
+       local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+       if (old)
+               obj_cgroup_put(old);
  }
  
  /*
   * Cache charges(val) to local per_cpu area.
   * This will be consumed by consume_stock() function, later.
   */
-static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  {
         struct memcg_stock_pcp *stock;
-       unsigned long flags;
-
-       local_irq_save(flags);
  
         stock = this_cpu_ptr(&memcg_stock);
         if (stock->cached != memcg) { /* reset if necessary */
@@ -2275,8 +2292,15 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  
         if (stock->nr_pages > MEMCG_CHARGE_BATCH)
                 drain_stock(stock);
+}
  
-       local_irq_restore(flags);
+static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+{
+       unsigned long flags;
+
+       local_lock_irqsave(&memcg_stock.stock_lock, flags);
+       __refill_stock(memcg, nr_pages);
+       local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
  }
  
  /*
@@ -2296,7 +2320,8 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
          * as well as workers from this path always operate on the local
          * per-cpu data. CPU up doesn't touch memcg_stock at all.
          */
-       curcpu = get_cpu();
+       migrate_disable();
+       curcpu = smp_processor_id();
         for_each_online_cpu(cpu) {
                 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
                 struct mem_cgroup *memcg;
@@ -2319,7 +2344,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
                                 schedule_work_on(cpu, &stock->work);
                 }
         }
-       put_cpu();
+       migrate_enable();
         mutex_unlock(&percpu_charge_mutex);
  }
  
@@ -3084,17 +3109,21 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
  void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
                      enum node_stat_item idx, int nr)
  {
+       struct memcg_stock_pcp *stock;
+       struct obj_cgroup *old = NULL;
         unsigned long flags;
-       struct obj_stock *stock = get_obj_stock(&flags);
         int *bytes;
  
+       local_lock_irqsave(&memcg_stock.stock_lock, flags);
+       stock = this_cpu_ptr(&memcg_stock);
+
         /*
          * Save vmstat data in stock and skip vmstat array update unless
          * accumulating over a page of vmstat data or when pgdat or idx
          * changes.
          */
         if (stock->cached_objcg != objcg) {
-               drain_obj_stock(stock);
+               old = drain_obj_stock(stock);
                 obj_cgroup_get(objcg);
                 stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
                                 ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
@@ -3138,38 +3167,53 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
         if (nr)
                 mod_objcg_mlstate(objcg, pgdat, idx, nr);
  
-       put_obj_stock(flags);
+       local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+       if (old)
+               obj_cgroup_put(old);
  }
  
  static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
  {
+       struct memcg_stock_pcp *stock;
         unsigned long flags;
-       struct obj_stock *stock = get_obj_stock(&flags);
         bool ret = false;
  
+       local_lock_irqsave(&memcg_stock.stock_lock, flags);
+
+       stock = this_cpu_ptr(&memcg_stock);
         if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
                 stock->nr_bytes -= nr_bytes;
                 ret = true;
         }
  
-       put_obj_stock(flags);
+       local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
  
         return ret;
  }
  
-static void drain_obj_stock(struct obj_stock *stock)
+static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
  {
         struct obj_cgroup *old = stock->cached_objcg;
  
         if (!old)
-               return;
+               return NULL;
  
         if (stock->nr_bytes) {
                 unsigned int nr_pages = stock->nr_bytes >> PAGE_SHIFT;
                 unsigned int nr_bytes = stock->nr_bytes & (PAGE_SIZE - 1);
  
-               if (nr_pages)
-                       obj_cgroup_uncharge_pages(old, nr_pages);
+               if (nr_pages) {
+                       struct mem_cgroup *memcg;
+
+                       memcg = get_mem_cgroup_from_objcg(old);
+
+                       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+                               page_counter_uncharge(&memcg->kmem, nr_pages);
+
+                       __refill_stock(memcg, nr_pages);
+
+                       css_put(&memcg->css);
+               }
  
                 /*
                  * The leftover is flushed to the centralized per-memcg value.
@@ -3204,8 +3248,12 @@ static void drain_obj_stock(struct obj_stock *stock)
                 stock->cached_pgdat = NULL;
         }
  
-       obj_cgroup_put(old);
         stock->cached_objcg = NULL;
+       /*
+        * The `old' objects needs to be released by the caller via
+        * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock.
+        */
+       return old;
  }
  
  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
@@ -3213,13 +3261,8 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
  {
         struct mem_cgroup *memcg;
  
-       if (in_task() && stock->task_obj.cached_objcg) {
-               memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
-               if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
-                       return true;
-       }
-       if (stock->irq_obj.cached_objcg) {
-               memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
+       if (stock->cached_objcg) {
+               memcg = obj_cgroup_memcg(stock->cached_objcg);
                 if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
                         return true;
         }
@@ -3230,12 +3273,16 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
  static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
                              bool allow_uncharge)
  {
+       struct memcg_stock_pcp *stock;
+       struct obj_cgroup *old = NULL;
         unsigned long flags;
-       struct obj_stock *stock = get_obj_stock(&flags);
         unsigned int nr_pages = 0;
  
+       local_lock_irqsave(&memcg_stock.stock_lock, flags);
+
+       stock = this_cpu_ptr(&memcg_stock);
         if (stock->cached_objcg != objcg) { /* reset if necessary */
-               drain_obj_stock(stock);
+               old = drain_obj_stock(stock);
                 obj_cgroup_get(objcg);
                 stock->cached_objcg = objcg;
                 stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
@@ -3249,7 +3296,9 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
                 stock->nr_bytes &= (PAGE_SIZE - 1);
         }
  
-       put_obj_stock(flags);
+       local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+       if (old)
+               obj_cgroup_put(old);
  
         if (nr_pages)
                 obj_cgroup_uncharge_pages(objcg, nr_pages);
@@ -3846,8 +3895,12 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
                 }
                 break;
         case RES_SOFT_LIMIT:
-               memcg->soft_limit = nr_pages;
-               ret = 0;
+               if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+                       ret = -EOPNOTSUPP;
+               } else {
+                       memcg->soft_limit = nr_pages;
+                       ret = 0;
+               }
                 break;
         }
         return ret ?: nbytes;
@@ -4824,6 +4877,9 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
         char *endp;
         int ret;
  
+       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+               return -EOPNOTSUPP;
+
         buf = strstrip(buf);
  
         efd = simple_strtoul(buf, &endp, 10);
@@ -6915,7 +6971,6 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
         unsigned long nr_pages;
         struct mem_cgroup *memcg;
         struct obj_cgroup *objcg;
-       bool use_objcg = PageMemcgKmem(page);
  
         VM_BUG_ON_PAGE(PageLRU(page), page);
  
@@ -6924,7 +6979,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
          * page memcg or objcg at this point, we have fully
          * exclusive access to the page.
          */
-       if (use_objcg) {
+       if (PageMemcgKmem(page)) {
                 objcg = __page_objcg(page);
                 /*
                  * This get matches the put at the end of the function and
@@ -6952,7 +7007,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
  
         nr_pages = compound_nr(page);
  
-       if (use_objcg) {
+       if (PageMemcgKmem(page)) {
                 ug->nr_memory += nr_pages;
                 ug->nr_kmem += nr_pages;
  
@@ -7282,8 +7337,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
          * important here to have the interrupts disabled because it is the
          * only synchronisation we have for updating the per-CPU variables.
          */
-       VM_BUG_ON(!irqs_disabled());
+       memcg_stats_lock();
         mem_cgroup_charge_statistics(memcg, page, -nr_entries);
+       memcg_stats_unlock();
         memcg_check_events(memcg, page);
  
         css_put(&memcg->css);
diff --git a/mm/memory.c b/mm/memory.c

index 32abe36..d037e84 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5291,7 +5291,7 @@ void __might_fault(const char *file, int line)
                 return;
         if (pagefault_disabled())
                 return;
-       __might_sleep(file, line, 0);
+       __might_sleep(file, line);
  #if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
         if (current->mm)
                 might_lock_read(&current->mm->mmap_lock);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 15191b2..e31bbd2 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3171,9 +3171,9 @@ static void drain_local_pages_wq(struct work_struct *work)
          * cpu which is alright but we also have to make sure to not move to
          * a different one.
          */
-       preempt_disable();
+       migrate_disable();
         drain_local_pages(drain->zone);
-       preempt_enable();
+       migrate_enable();
  }
  
  /*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c

index 8375eec..f81f11b 100644 (file)
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1918,11 +1918,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
                 return ERR_PTR(err);
         }
  
-       vbq = &get_cpu_var(vmap_block_queue);
+       get_cpu_light();
+       vbq = this_cpu_ptr(&vmap_block_queue);
         spin_lock(&vbq->lock);
         list_add_tail_rcu(&vb->free_list, &vbq->free);
         spin_unlock(&vbq->lock);
-       put_cpu_var(vmap_block_queue);
+       put_cpu_light();
  
         return vaddr;
  }
@@ -2001,7 +2002,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
         order = get_order(size);
  
         rcu_read_lock();
-       vbq = &get_cpu_var(vmap_block_queue);
+       get_cpu_light();
+       vbq = this_cpu_ptr(&vmap_block_queue);
         list_for_each_entry_rcu(vb, &vbq->free, free_list) {
                 unsigned long pages_off;
  
@@ -2024,7 +2026,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
                 break;
         }
  
-       put_cpu_var(vmap_block_queue);
+       put_cpu_light();
         rcu_read_unlock();
  
         /* Allocate new block if nothing was found */
diff --git a/mm/workingset.c b/mm/workingset.c

index 880d882..2a9ed5a 100644 (file)
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -433,6 +433,8 @@ static struct list_lru shadow_nodes;
  
  void workingset_update_node(struct xa_node *node)
  {
+       struct address_space *mapping;
+
         /*
          * Track non-empty nodes that contain only shadow entries;
          * unlink those that contain pages or are being freed.
@@ -441,7 +443,8 @@ void workingset_update_node(struct xa_node *node)
          * already where they should be. The list_empty() test is safe
          * as node->private_list is protected by the i_pages lock.
          */
-       VM_WARN_ON_ONCE(!irqs_disabled());  /* For __inc_lruvec_page_state */
+       mapping = container_of(node->array, struct address_space, i_pages);
+       lockdep_assert_held(&mapping->i_pages.xa_lock);
  
         if (node->count && node->count == node->nr_values) {
                 if (list_empty(&node->private_list)) {
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c

index 439deb8..a664318 100644 (file)
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -57,6 +57,7 @@
  #include <linux/wait.h>
  #include <linux/pagemap.h>
  #include <linux/fs.h>
+#include <linux/local_lock.h>
  
  #define ZSPAGE_MAGIC   0x58
  
@@ -77,6 +78,20 @@
  
  #define ZS_HANDLE_SIZE (sizeof(unsigned long))
  
+#ifdef CONFIG_PREEMPT_RT
+
+struct zsmalloc_handle {
+       unsigned long addr;
+       spinlock_t lock;
+};
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
+
+#else
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
+#endif
+
  /*
   * Object location (<PFN>, <obj_idx>) is encoded as
   * a single (unsigned long) handle value.
@@ -293,6 +308,7 @@ struct zspage {
  };
  
  struct mapping_area {
+       local_lock_t lock;
         char *vm_buf; /* copy buffer for objects that span pages */
         char *vm_addr; /* address of kmap_atomic()'ed pages */
         enum zs_mapmode vm_mm; /* mapping mode */
@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
  
  static int create_cache(struct zs_pool *pool)
  {
-       pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
+       pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
                                         0, 0, NULL);
         if (!pool->handle_cachep)
                 return 1;
@@ -346,10 +362,27 @@ static void destroy_cache(struct zs_pool *pool)
  
  static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
  {
-       return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-                       gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+       void *p;
+
+       p = kmem_cache_alloc(pool->handle_cachep,
+                            gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+#ifdef CONFIG_PREEMPT_RT
+       if (p) {
+               struct zsmalloc_handle *zh = p;
+
+               spin_lock_init(&zh->lock);
+       }
+#endif
+       return (unsigned long)p;
  }
  
+#ifdef CONFIG_PREEMPT_RT
+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
+{
+       return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1));
+}
+#endif
+
  static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
  {
         kmem_cache_free(pool->handle_cachep, (void *)handle);
@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
  
  static void record_obj(unsigned long handle, unsigned long obj)
  {
+#ifdef CONFIG_PREEMPT_RT
+       struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+       WRITE_ONCE(zh->addr, obj);
+#else
         /*
          * lsb of @obj represents handle lock while other bits
          * represent object value the handle is pointing so
          * updating shouldn't do store tearing.
          */
         WRITE_ONCE(*(unsigned long *)handle, obj);
+#endif
  }
  
  /* zpool driver */
@@ -455,7 +494,9 @@ MODULE_ALIAS("zpool-zsmalloc");
  #endif /* CONFIG_ZPOOL */
  
  /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
+static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
+       .lock   = INIT_LOCAL_LOCK(lock),
+};
  
  static bool is_zspage_isolated(struct zspage *zspage)
  {
@@ -862,7 +903,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
  
  static unsigned long handle_to_obj(unsigned long handle)
  {
+#ifdef CONFIG_PREEMPT_RT
+       struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+       return zh->addr;
+#else
         return *(unsigned long *)handle;
+#endif
  }
  
  static unsigned long obj_to_head(struct page *page, void *obj)
@@ -876,22 +923,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
  
  static inline int testpin_tag(unsigned long handle)
  {
+#ifdef CONFIG_PREEMPT_RT
+       struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+       return spin_is_locked(&zh->lock);
+#else
         return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
  }
  
  static inline int trypin_tag(unsigned long handle)
  {
+#ifdef CONFIG_PREEMPT_RT
+       struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+       return spin_trylock(&zh->lock);
+#else
         return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
  }
  
  static void pin_tag(unsigned long handle) __acquires(bitlock)
  {
+#ifdef CONFIG_PREEMPT_RT
+       struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+       return spin_lock(&zh->lock);
+#else
         bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
  }
  
  static void unpin_tag(unsigned long handle) __releases(bitlock)
  {
+#ifdef CONFIG_PREEMPT_RT
+       struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+       return spin_unlock(&zh->lock);
+#else
         bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
  }
  
  static void reset_page(struct page *page)
@@ -1274,7 +1345,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
         class = pool->size_class[class_idx];
         off = (class->size * obj_idx) & ~PAGE_MASK;
  
-       area = &get_cpu_var(zs_map_area);
+       local_lock(&zs_map_area.lock);
+       area = this_cpu_ptr(&zs_map_area);
         area->vm_mm = mm;
         if (off + class->size <= PAGE_SIZE) {
                 /* this object is contained entirely within a page */
@@ -1328,7 +1400,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
  
                 __zs_unmap_object(area, pages, off, class->size);
         }
-       put_cpu_var(zs_map_area);
+       local_unlock(&zs_map_area.lock);
  
         migrate_read_unlock(zspage);
         unpin_tag(handle);
diff --git a/net/Kconfig b/net/Kconfig

index fb13460..074472d 100644 (file)
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID
  
  config NET_RX_BUSY_POLL
         bool
-       default y
+       default y if !PREEMPT_RT
  
  config BQL
         bool
diff --git a/net/core/dev.c b/net/core/dev.c

index 33d6b69..dfa1f16 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -225,14 +225,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
  static inline void rps_lock(struct softnet_data *sd)
  {
  #ifdef CONFIG_RPS
-       spin_lock(&sd->input_pkt_queue.lock);
+       raw_spin_lock(&sd->input_pkt_queue.raw_lock);
  #endif
  }
  
  static inline void rps_unlock(struct softnet_data *sd)
  {
  #ifdef CONFIG_RPS
-       spin_unlock(&sd->input_pkt_queue.lock);
+       raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
  #endif
  }
  
@@ -3044,6 +3044,7 @@ static void __netif_reschedule(struct Qdisc *q)
         sd->output_queue_tailp = &q->next_sched;
         raise_softirq_irqoff(NET_TX_SOFTIRQ);
         local_irq_restore(flags);
+       preempt_check_resched_rt();
  }
  
  void __netif_schedule(struct Qdisc *q)
@@ -3106,6 +3107,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
         __this_cpu_write(softnet_data.completion_queue, skb);
         raise_softirq_irqoff(NET_TX_SOFTIRQ);
         local_irq_restore(flags);
+       preempt_check_resched_rt();
  }
  EXPORT_SYMBOL(__dev_kfree_skb_irq);
  
@@ -3837,7 +3839,11 @@ no_lock_out:
          * This permits qdisc->running owner to get the lock more
          * often and dequeue packets faster.
          */
+#ifdef CONFIG_PREEMPT_RT
+       contended = true;
+#else
         contended = qdisc_is_running(q);
+#endif
         if (unlikely(contended))
                 spin_lock(&q->busylock);
  
@@ -4663,6 +4669,7 @@ drop:
         rps_unlock(sd);
  
         local_irq_restore(flags);
+       preempt_check_resched_rt();
  
         atomic_long_inc(&skb->dev->rx_dropped);
         kfree_skb(skb);
@@ -4903,7 +4910,7 @@ static int netif_rx_internal(struct sk_buff *skb)
                 struct rps_dev_flow voidflow, *rflow = &voidflow;
                 int cpu;
  
-               preempt_disable();
+               migrate_disable();
                 rcu_read_lock();
  
                 cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -4913,14 +4920,14 @@ static int netif_rx_internal(struct sk_buff *skb)
                 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  
                 rcu_read_unlock();
-               preempt_enable();
+               migrate_enable();
         } else
  #endif
         {
                 unsigned int qtail;
  
-               ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
-               put_cpu();
+               ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
+               put_cpu_light();
         }
         return ret;
  }
@@ -4959,11 +4966,9 @@ int netif_rx_ni(struct sk_buff *skb)
  
         trace_netif_rx_ni_entry(skb);
  
-       preempt_disable();
+       local_bh_disable();
         err = netif_rx_internal(skb);
-       if (local_softirq_pending())
-               do_softirq();
-       preempt_enable();
+       local_bh_enable();
         trace_netif_rx_ni_exit(err);
  
         return err;
@@ -6407,12 +6412,14 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
                 sd->rps_ipi_list = NULL;
  
                 local_irq_enable();
+               preempt_check_resched_rt();
  
                 /* Send pending IPI's to kick RPS processing on remote cpus. */
                 net_rps_send_ipi(remsd);
         } else
  #endif
                 local_irq_enable();
+       preempt_check_resched_rt();
  }
  
  static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
@@ -6490,6 +6497,7 @@ void __napi_schedule(struct napi_struct *n)
         local_irq_save(flags);
         ____napi_schedule(this_cpu_ptr(&softnet_data), n);
         local_irq_restore(flags);
+       preempt_check_resched_rt();
  }
  EXPORT_SYMBOL(__napi_schedule);
  
@@ -11312,6 +11320,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
  
         raise_softirq_irqoff(NET_TX_SOFTIRQ);
         local_irq_enable();
+       preempt_check_resched_rt();
  
  #ifdef CONFIG_RPS
         remsd = oldsd->rps_ipi_list;
@@ -11325,7 +11334,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
                 netif_rx_ni(skb);
                 input_queue_head_incr(oldsd);
         }
-       while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
+       while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
                 netif_rx_ni(skb);
                 input_queue_head_incr(oldsd);
         }
@@ -11640,7 +11649,7 @@ static int __init net_dev_init(void)
  
                 INIT_WORK(flush, flush_backlog);
  
-               skb_queue_head_init(&sd->input_pkt_queue);
+               skb_queue_head_init_raw(&sd->input_pkt_queue);
                 skb_queue_head_init(&sd->process_queue);
  #ifdef CONFIG_XFRM_OFFLOAD
                 skb_queue_head_init(&sd->xfrm_backlog);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c

index 8e582e2..4fcbdd7 100644 (file)
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -40,10 +40,10 @@
   */
  
  struct net_rate_estimator {
-       struct gnet_stats_basic_packed  *bstats;
+       struct gnet_stats_basic_sync    *bstats;
         spinlock_t              *stats_lock;
-       seqcount_t              *running;
-       struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+       bool                    running;
+       struct gnet_stats_basic_sync __percpu *cpu_bstats;
         u8                      ewma_log;
         u8                      intvl_log; /* period : (250ms << intvl_log) */
  
@@ -60,13 +60,13 @@ struct net_rate_estimator {
  };
  
  static void est_fetch_counters(struct net_rate_estimator *e,
-                              struct gnet_stats_basic_packed *b)
+                              struct gnet_stats_basic_sync *b)
  {
-       memset(b, 0, sizeof(*b));
+       gnet_stats_basic_sync_init(b);
         if (e->stats_lock)
                 spin_lock(e->stats_lock);
  
-       __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
+       gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);
  
         if (e->stats_lock)
                 spin_unlock(e->stats_lock);
@@ -76,14 +76,18 @@ static void est_fetch_counters(struct net_rate_estimator *e,
  static void est_timer(struct timer_list *t)
  {
         struct net_rate_estimator *est = from_timer(est, t, timer);
-       struct gnet_stats_basic_packed b;
+       struct gnet_stats_basic_sync b;
+       u64 b_bytes, b_packets;
         u64 rate, brate;
  
         est_fetch_counters(est, &b);
-       brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
+       b_bytes = u64_stats_read(&b.bytes);
+       b_packets = u64_stats_read(&b.packets);
+
+       brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
         brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
  
-       rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
+       rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
         rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
  
         write_seqcount_begin(&est->seq);
@@ -91,8 +95,8 @@ static void est_timer(struct timer_list *t)
         est->avpps += rate;
         write_seqcount_end(&est->seq);
  
-       est->last_bytes = b.bytes;
-       est->last_packets = b.packets;
+       est->last_bytes = b_bytes;
+       est->last_packets = b_packets;
  
         est->next_jiffies += ((HZ/4) << est->intvl_log);
  
@@ -109,7 +113,9 @@ static void est_timer(struct timer_list *t)
   * @cpu_bstats: bstats per cpu
   * @rate_est: rate estimator statistics
   * @lock: lock for statistics and control path
- * @running: qdisc running seqcount
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ *           internal values might change during basic reads. Only used
+ *           if @bstats_cpu is NULL
   * @opt: rate estimator configuration TLV
   *
   * Creates a new rate estimator with &bstats as source and &rate_est
@@ -121,16 +127,16 @@ static void est_timer(struct timer_list *t)
   * Returns 0 on success or a negative error code.
   *
   */
-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
-                     struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
+                     struct gnet_stats_basic_sync __percpu *cpu_bstats,
                       struct net_rate_estimator __rcu **rate_est,
                       spinlock_t *lock,
-                     seqcount_t *running,
+                     bool running,
                       struct nlattr *opt)
  {
         struct gnet_estimator *parm = nla_data(opt);
         struct net_rate_estimator *old, *est;
-       struct gnet_stats_basic_packed b;
+       struct gnet_stats_basic_sync b;
         int intvl_log;
  
         if (nla_len(opt) < sizeof(*parm))
@@ -164,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
         est_fetch_counters(est, &b);
         if (lock)
                 local_bh_enable();
-       est->last_bytes = b.bytes;
-       est->last_packets = b.packets;
+       est->last_bytes = u64_stats_read(&b.bytes);
+       est->last_packets = u64_stats_read(&b.packets);
  
         if (lock)
                 spin_lock_bh(lock);
@@ -214,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
   * @cpu_bstats: bstats per cpu
   * @rate_est: rate estimator statistics
   * @lock: lock for statistics and control path
- * @running: qdisc running seqcount (might be NULL)
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ *           internal values might change during basic reads. Only used
+ *           if @cpu_bstats is NULL
   * @opt: rate estimator configuration TLV
   *
   * Replaces the configuration of a rate estimator by calling
@@ -222,11 +230,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
   *
   * Returns 0 on success or a negative error code.
   */
-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
-                         struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+                         struct gnet_stats_basic_sync __percpu *cpu_bstats,
                           struct net_rate_estimator __rcu **rate_est,
                           spinlock_t *lock,
-                         seqcount_t *running, struct nlattr *opt)
+                         bool running, struct nlattr *opt)
  {
         return gen_new_estimator(bstats, cpu_bstats, rate_est,
                                  lock, running, opt);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c

index e491b08..a10335b 100644 (file)
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -18,7 +18,7 @@
  #include <linux/gen_stats.h>
  #include <net/netlink.h>
  #include <net/gen_stats.h>
-
+#include <net/sch_generic.h>
  
  static inline int
  gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
@@ -114,63 +114,112 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
  }
  EXPORT_SYMBOL(gnet_stats_start_copy);
  
-static void
-__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
-                           struct gnet_stats_basic_cpu __percpu *cpu)
+/* Must not be inlined, due to u64_stats seqcount_t lockdep key */
+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
  {
+       u64_stats_set(&b->bytes, 0);
+       u64_stats_set(&b->packets, 0);
+       u64_stats_init(&b->syncp);
+}
+EXPORT_SYMBOL(gnet_stats_basic_sync_init);
+
+static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
+                                    struct gnet_stats_basic_sync __percpu *cpu)
+{
+       u64 t_bytes = 0, t_packets = 0;
         int i;
  
         for_each_possible_cpu(i) {
-               struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
+               struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
                 unsigned int start;
                 u64 bytes, packets;
  
                 do {
                         start = u64_stats_fetch_begin_irq(&bcpu->syncp);
-                       bytes = bcpu->bstats.bytes;
-                       packets = bcpu->bstats.packets;
+                       bytes = u64_stats_read(&bcpu->bytes);
+                       packets = u64_stats_read(&bcpu->packets);
                 } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
  
-               bstats->bytes += bytes;
-               bstats->packets += packets;
+               t_bytes += bytes;
+               t_packets += packets;
+       }
+       _bstats_update(bstats, t_bytes, t_packets);
+}
+
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+                         struct gnet_stats_basic_sync __percpu *cpu,
+                         struct gnet_stats_basic_sync *b, bool running)
+{
+       unsigned int start;
+       u64 bytes = 0;
+       u64 packets = 0;
+
+       WARN_ON_ONCE((cpu || running) && in_hardirq());
+
+       if (cpu) {
+               gnet_stats_add_basic_cpu(bstats, cpu);
+               return;
         }
+       do {
+               if (running)
+                       start = u64_stats_fetch_begin_irq(&b->syncp);
+               bytes = u64_stats_read(&b->bytes);
+               packets = u64_stats_read(&b->packets);
+       } while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
+
+       _bstats_update(bstats, bytes, packets);
  }
+EXPORT_SYMBOL(gnet_stats_add_basic);
  
-void
-__gnet_stats_copy_basic(const seqcount_t *running,
-                       struct gnet_stats_basic_packed *bstats,
-                       struct gnet_stats_basic_cpu __percpu *cpu,
-                       struct gnet_stats_basic_packed *b)
+static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets,
+                                 struct gnet_stats_basic_sync __percpu *cpu,
+                                 struct gnet_stats_basic_sync *b, bool running)
  {
-       unsigned int seq;
+       unsigned int start;
  
         if (cpu) {
-               __gnet_stats_copy_basic_cpu(bstats, cpu);
+               u64 t_bytes = 0, t_packets = 0;
+               int i;
+
+               for_each_possible_cpu(i) {
+                       struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
+                       unsigned int start;
+                       u64 bytes, packets;
+
+                       do {
+                               start = u64_stats_fetch_begin_irq(&bcpu->syncp);
+                               bytes = u64_stats_read(&bcpu->bytes);
+                               packets = u64_stats_read(&bcpu->packets);
+                       } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
+
+                       t_bytes += bytes;
+                       t_packets += packets;
+               }
+               *ret_bytes = t_bytes;
+               *ret_packets = t_packets;
                 return;
         }
         do {
                 if (running)
-                       seq = read_seqcount_begin(running);
-               bstats->bytes = b->bytes;
-               bstats->packets = b->packets;
-       } while (running && read_seqcount_retry(running, seq));
+                       start = u64_stats_fetch_begin_irq(&b->syncp);
+               *ret_bytes = u64_stats_read(&b->bytes);
+               *ret_packets = u64_stats_read(&b->packets);
+       } while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
  }
-EXPORT_SYMBOL(__gnet_stats_copy_basic);
  
  static int
-___gnet_stats_copy_basic(const seqcount_t *running,
-                        struct gnet_dump *d,
-                        struct gnet_stats_basic_cpu __percpu *cpu,
-                        struct gnet_stats_basic_packed *b,
-                        int type)
+___gnet_stats_copy_basic(struct gnet_dump *d,
+                        struct gnet_stats_basic_sync __percpu *cpu,
+                        struct gnet_stats_basic_sync *b,
+                        int type, bool running)
  {
-       struct gnet_stats_basic_packed bstats = {0};
+       u64 bstats_bytes, bstats_packets;
  
-       __gnet_stats_copy_basic(running, &bstats, cpu, b);
+       gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running);
  
         if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
-               d->tc_stats.bytes = bstats.bytes;
-               d->tc_stats.packets = bstats.packets;
+               d->tc_stats.bytes = bstats_bytes;
+               d->tc_stats.packets = bstats_packets;
         }
  
         if (d->tail) {
@@ -178,24 +227,28 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
                 int res;
  
                 memset(&sb, 0, sizeof(sb));
-               sb.bytes = bstats.bytes;
-               sb.packets = bstats.packets;
+               sb.bytes = bstats_bytes;
+               sb.packets = bstats_packets;
                 res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD);
-               if (res < 0 || sb.packets == bstats.packets)
+               if (res < 0 || sb.packets == bstats_packets)
                         return res;
                 /* emit 64bit stats only if needed */
-               return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets,
-                                      sizeof(bstats.packets), TCA_STATS_PAD);
+               return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets,
+                                      sizeof(bstats_packets), TCA_STATS_PAD);
         }
         return 0;
  }
  
  /**
   * gnet_stats_copy_basic - copy basic statistics into statistic TLV
- * @running: seqcount_t pointer
   * @d: dumping handle
   * @cpu: copy statistic per cpu
   * @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ *           internal values might change during basic reads.
+ *           Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
   *
   * Appends the basic statistics to the top level TLV created by
   * gnet_stats_start_copy().
@@ -204,22 +257,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
   * if the room in the socket buffer was not sufficient.
   */
  int
-gnet_stats_copy_basic(const seqcount_t *running,
-                     struct gnet_dump *d,
-                     struct gnet_stats_basic_cpu __percpu *cpu,
-                     struct gnet_stats_basic_packed *b)
+gnet_stats_copy_basic(struct gnet_dump *d,
+                     struct gnet_stats_basic_sync __percpu *cpu,
+                     struct gnet_stats_basic_sync *b,
+                     bool running)
  {
-       return ___gnet_stats_copy_basic(running, d, cpu, b,
-                                       TCA_STATS_BASIC);
+       return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
  }
  EXPORT_SYMBOL(gnet_stats_copy_basic);
  
  /**
   * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
- * @running: seqcount_t pointer
   * @d: dumping handle
   * @cpu: copy statistic per cpu
   * @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ *           internal values might change during basic reads.
+ *           Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
   *
   * Appends the basic statistics to the top level TLV created by
   * gnet_stats_start_copy().
@@ -228,13 +284,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
   * if the room in the socket buffer was not sufficient.
   */
  int
-gnet_stats_copy_basic_hw(const seqcount_t *running,
-                        struct gnet_dump *d,
-                        struct gnet_stats_basic_cpu __percpu *cpu,
-                        struct gnet_stats_basic_packed *b)
+gnet_stats_copy_basic_hw(struct gnet_dump *d,
+                        struct gnet_stats_basic_sync __percpu *cpu,
+                        struct gnet_stats_basic_sync *b,
+                        bool running)
  {
-       return ___gnet_stats_copy_basic(running, d, cpu, b,
-                                       TCA_STATS_BASIC_HW);
+       return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
  }
  EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
  
@@ -282,16 +337,15 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
  }
  EXPORT_SYMBOL(gnet_stats_copy_rate_est);
  
-static void
-__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
-                           const struct gnet_stats_queue __percpu *q)
+static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats,
+                                    const struct gnet_stats_queue __percpu *q)
  {
         int i;
  
         for_each_possible_cpu(i) {
                 const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
  
-               qstats->qlen = 0;
+               qstats->qlen += qcpu->backlog;
                 qstats->backlog += qcpu->backlog;
                 qstats->drops += qcpu->drops;
                 qstats->requeues += qcpu->requeues;
@@ -299,24 +353,21 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
         }
  }
  
-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
-                            const struct gnet_stats_queue __percpu *cpu,
-                            const struct gnet_stats_queue *q,
-                            __u32 qlen)
+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
+                         const struct gnet_stats_queue __percpu *cpu,
+                         const struct gnet_stats_queue *q)
  {
         if (cpu) {
-               __gnet_stats_copy_queue_cpu(qstats, cpu);
+               gnet_stats_add_queue_cpu(qstats, cpu);
         } else {
-               qstats->qlen = q->qlen;
-               qstats->backlog = q->backlog;
-               qstats->drops = q->drops;
-               qstats->requeues = q->requeues;
-               qstats->overlimits = q->overlimits;
+               qstats->qlen += q->qlen;
+               qstats->backlog += q->backlog;
+               qstats->drops += q->drops;
+               qstats->requeues += q->requeues;
+               qstats->overlimits += q->overlimits;
         }
-
-       qstats->qlen = qlen;
  }
-EXPORT_SYMBOL(__gnet_stats_copy_queue);
+EXPORT_SYMBOL(gnet_stats_add_queue);
  
  /**
   * gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -339,7 +390,8 @@ gnet_stats_copy_queue(struct gnet_dump *d,
  {
         struct gnet_stats_queue qstats = {0};
  
-       __gnet_stats_copy_queue(&qstats, cpu_q, q, qlen);
+       gnet_stats_add_queue(&qstats, cpu_q, q);
+       qstats.qlen = qlen;
  
         if (d->compat_tc_stats) {
                 d->tc_stats.drops = qstats.drops;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c

index 0d5c422..8aec1b5 100644 (file)
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -94,11 +94,11 @@ static unsigned int
  xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
  {
         const struct xt_rateest_target_info *info = par->targinfo;
-       struct gnet_stats_basic_packed *stats = &info->est->bstats;
+       struct gnet_stats_basic_sync *stats = &info->est->bstats;
  
         spin_lock_bh(&info->est->lock);
-       stats->bytes += skb->len;
-       stats->packets++;
+       u64_stats_add(&stats->bytes, skb->len);
+       u64_stats_inc(&stats->packets);
         spin_unlock_bh(&info->est->lock);
  
         return XT_CONTINUE;
@@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
         if (!est)
                 goto err1;
  
+       gnet_stats_basic_sync_init(&est->bstats);
         strlcpy(est->name, info->name, sizeof(est->name));
         spin_lock_init(&est->lock);
         est->refcnt             = 1;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c

index d775676..94c0571 100644 (file)
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -486,16 +486,18 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                 atomic_set(&p->tcfa_bindcnt, 1);
  
         if (cpustats) {
-               p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+               p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
                 if (!p->cpu_bstats)
                         goto err1;
-               p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+               p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
                 if (!p->cpu_bstats_hw)
                         goto err2;
                 p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
                 if (!p->cpu_qstats)
                         goto err3;
         }
+       gnet_stats_basic_sync_init(&p->tcfa_bstats);
+       gnet_stats_basic_sync_init(&p->tcfa_bstats_hw);
         spin_lock_init(&p->tcfa_lock);
         p->tcfa_index = index;
         p->tcfa_tm.install = jiffies;
@@ -505,7 +507,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
         if (est) {
                 err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
                                         &p->tcfa_rate_est,
-                                       &p->tcfa_lock, NULL, est);
+                                       &p->tcfa_lock, false, est);
                 if (err)
                         goto err4;
         }
@@ -1141,13 +1143,13 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
                              u64 drops, bool hw)
  {
         if (a->cpu_bstats) {
-               _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+               _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
  
                 this_cpu_ptr(a->cpu_qstats)->drops += drops;
  
                 if (hw)
-                       _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
-                                          bytes, packets);
+                       _bstats_update(this_cpu_ptr(a->cpu_bstats_hw),
+                                      bytes, packets);
                 return;
         }
  
@@ -1186,9 +1188,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
         if (err < 0)
                 goto errout;
  
-       if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
-           gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
-                                    &p->tcfa_bstats_hw) < 0 ||
+       if (gnet_stats_copy_basic(&d, p->cpu_bstats,
+                                 &p->tcfa_bstats, false) < 0 ||
+           gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
+                                    &p->tcfa_bstats_hw, false) < 0 ||
             gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
             gnet_stats_copy_queue(&d, p->cpu_qstats,
                                   &p->tcfa_qstats,
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c

index 5c36013..f2bf896 100644 (file)
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -41,7 +41,7 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
         int action, filter_res;
  
         tcf_lastuse_update(&prog->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
  
         filter = rcu_dereference(prog->filter);
         if (at_ingress) {
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c

index 7064a36..b757f90 100644 (file)
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
         u8 *tlv_data;
         u16 metalen;
  
-       bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
         tcf_lastuse_update(&ife->tcf_tm);
  
         if (skb_at_tc_ingress(skb))
@@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
                         exceed_mtu = true;
         }
  
-       bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
         tcf_lastuse_update(&ife->tcf_tm);
  
         if (!metalen) {         /* no metadata to send */
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c

index db0ef04..c0730eb 100644 (file)
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
         int ret, mac_len;
  
         tcf_lastuse_update(&m->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb);
  
         /* Ensure 'data' points at mac_header prior calling mpls manipulating
          * functions.
diff --git a/net/sched/act_police.c b/net/sched/act_police.c

index 5c0a3ea..cbeb999 100644 (file)
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
                                             police->common.cpu_bstats,
                                             &police->tcf_rate_est,
                                             &police->tcf_lock,
-                                           NULL, est);
+                                           false, est);
                 if (err)
                         goto failure;
         } else if (tb[TCA_POLICE_AVRATE] &&
@@ -262,7 +262,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
         int ret;
  
         tcf_lastuse_update(&police->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb);
  
         ret = READ_ONCE(police->tcf_action);
         p = rcu_dereference_bh(police->params);
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c

index 230501e..ce859b0 100644 (file)
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -163,7 +163,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
         int retval;
  
         tcf_lastuse_update(&s->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb);
         retval = READ_ONCE(s->tcf_action);
  
         psample_group = rcu_dereference_bh(s->psample_group);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c

index cbbe186..e617ab4 100644 (file)
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -36,7 +36,8 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a,
          * then it would look like "hello_3" (without quotes)
          */
         pr_info("simple: %s_%llu\n",
-              (char *)d->tcfd_defdata, d->tcf_bstats.packets);
+               (char *)d->tcfd_defdata,
+               u64_stats_read(&d->tcf_bstats.packets));
         spin_unlock(&d->tcf_lock);
         return d->tcf_action;
  }
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c

index 6054185..d30ecbf 100644 (file)
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
         int action;
  
         tcf_lastuse_update(&d->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
  
         params = rcu_dereference_bh(d->params);
         action = READ_ONCE(d->tcf_action);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c

index ecb9ee6..9b6b52c 100644 (file)
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -31,7 +31,7 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
         u64 flags;
  
         tcf_lastuse_update(&d->tcf_tm);
-       bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+       bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
  
         action = READ_ONCE(d->tcf_action);
         if (unlikely(action == TC_ACT_SHOT))
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c

index 02f6200..6e23435 100644 (file)
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -884,7 +884,7 @@ static void qdisc_offload_graft_root(struct net_device *dev,
  static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
                          u32 portid, u32 seq, u16 flags, int event)
  {
-       struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+       struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
         struct tcmsg *tcm;
         struct nlmsghdr  *nlh;
@@ -942,8 +942,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
                 cpu_qstats = q->cpu_qstats;
         }
  
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
-                                 &d, cpu_bstats, &q->bstats) < 0 ||
+       if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
                 goto nla_put_failure;
@@ -1270,26 +1269,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
                 rcu_assign_pointer(sch->stab, stab);
         }
         if (tca[TCA_RATE]) {
-               seqcount_t *running;
-
                 err = -EOPNOTSUPP;
                 if (sch->flags & TCQ_F_MQROOT) {
                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
                         goto err_out4;
                 }
  
-               if (sch->parent != TC_H_ROOT &&
-                   !(sch->flags & TCQ_F_INGRESS) &&
-                   (!p || !(p->flags & TCQ_F_MQROOT)))
-                       running = qdisc_root_sleeping_running(sch);
-               else
-                       running = &sch->running;
-
                 err = gen_new_estimator(&sch->bstats,
                                         sch->cpu_bstats,
                                         &sch->rate_est,
                                         NULL,
-                                       running,
+                                       true,
                                         tca[TCA_RATE]);
                 if (err) {
                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
@@ -1365,7 +1355,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
                                       sch->cpu_bstats,
                                       &sch->rate_est,
                                       NULL,
-                                     qdisc_root_sleeping_running(sch),
+                                     true,
                                       tca[TCA_RATE]);
         }
  out:
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c

index 3373716..28e1897 100644 (file)
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -52,7 +52,7 @@ struct atm_flow_data {
         struct atm_qdisc_data   *parent;        /* parent qdisc */
         struct socket           *sock;          /* for closing */
         int                     ref;            /* reference count */
-       struct gnet_stats_basic_packed  bstats;
+       struct gnet_stats_basic_sync    bstats;
         struct gnet_stats_queue qstats;
         struct list_head        list;
         struct atm_flow_data    *excess;        /* flow for excess traffic;
@@ -551,6 +551,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
         pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
         INIT_LIST_HEAD(&p->flows);
         INIT_LIST_HEAD(&p->link.list);
+       gnet_stats_basic_sync_init(&p->link.bstats);
         list_add(&p->link.list, &p->flows);
         p->link.q = qdisc_create_dflt(sch->dev_queue,
                                       &pfifo_qdisc_ops, sch->handle, extack);
@@ -654,8 +655,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
  {
         struct atm_flow_data *flow = (struct atm_flow_data *)arg;
  
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, NULL, &flow->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
             gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
                 return -1;
  
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c

index 46b3dd7..c3a74a2 100644 (file)
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -116,7 +116,7 @@ struct cbq_class {
         long                    avgidle;
         long                    deficit;        /* Saved deficit for WRR */
         psched_time_t           penalized;
-       struct gnet_stats_basic_packed bstats;
+       struct gnet_stats_basic_sync bstats;
         struct gnet_stats_queue qstats;
         struct net_rate_estimator __rcu *rate_est;
         struct tc_cbq_xstats    xstats;
@@ -565,8 +565,7 @@ cbq_update(struct cbq_sched_data *q)
                 long avgidle = cl->avgidle;
                 long idle;
  
-               cl->bstats.packets++;
-               cl->bstats.bytes += len;
+               _bstats_update(&cl->bstats, len, 1);
  
                 /*
                  * (now - last) is total time between packet right edges.
@@ -1383,8 +1382,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
         if (cl->undertime != PSCHED_PASTPERFECT)
                 cl->xstats.undertime = cl->undertime - q->now;
  
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, NULL, &cl->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
             gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
                 return -1;
@@ -1518,7 +1516,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
                         err = gen_replace_estimator(&cl->bstats, NULL,
                                                     &cl->rate_est,
                                                     NULL,
-                                                   qdisc_root_sleeping_running(sch),
+                                                   true,
                                                     tca[TCA_RATE]);
                         if (err) {
                                 NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
@@ -1610,6 +1608,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
         if (cl == NULL)
                 goto failure;
  
+       gnet_stats_basic_sync_init(&cl->bstats);
         err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
         if (err) {
                 kfree(cl);
@@ -1618,9 +1617,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
  
         if (tca[TCA_RATE]) {
                 err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-                                       NULL,
-                                       qdisc_root_sleeping_running(sch),
-                                       tca[TCA_RATE]);
+                                       NULL, true, tca[TCA_RATE]);
                 if (err) {
                         NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
                         tcf_block_put(cl->block);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c

index 80a88e2..4e5b1cf 100644 (file)
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -19,7 +19,7 @@ struct drr_class {
         struct Qdisc_class_common       common;
         unsigned int                    filter_cnt;
  
-       struct gnet_stats_basic_packed          bstats;
+       struct gnet_stats_basic_sync            bstats;
         struct gnet_stats_queue         qstats;
         struct net_rate_estimator __rcu *rate_est;
         struct list_head                alist;
@@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
                 if (tca[TCA_RATE]) {
                         err = gen_replace_estimator(&cl->bstats, NULL,
                                                     &cl->rate_est,
-                                                   NULL,
-                                                   qdisc_root_sleeping_running(sch),
+                                                   NULL, true,
                                                     tca[TCA_RATE]);
                         if (err) {
                                 NL_SET_ERR_MSG(extack, "Failed to replace estimator");
@@ -106,6 +105,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
         if (cl == NULL)
                 return -ENOBUFS;
  
+       gnet_stats_basic_sync_init(&cl->bstats);
         cl->common.classid = classid;
         cl->quantum        = quantum;
         cl->qdisc          = qdisc_create_dflt(sch->dev_queue,
@@ -118,9 +118,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
  
         if (tca[TCA_RATE]) {
                 err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
-                                           NULL,
-                                           qdisc_root_sleeping_running(sch),
-                                           tca[TCA_RATE]);
+                                           NULL, true, tca[TCA_RATE]);
                 if (err) {
                         NL_SET_ERR_MSG(extack, "Failed to replace estimator");
                         qdisc_put(cl->qdisc);
@@ -267,8 +265,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
         if (qlen)
                 xstats.deficit = cl->deficit;
  
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, NULL, &cl->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
             gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
                 return -1;
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c

index 175e07b..8de4365 100644 (file)
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -41,7 +41,7 @@ struct ets_class {
         struct Qdisc *qdisc;
         u32 quantum;
         u32 deficit;
-       struct gnet_stats_basic_packed bstats;
+       struct gnet_stats_basic_sync bstats;
         struct gnet_stats_queue qstats;
  };
  
@@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
         struct ets_class *cl = ets_class_from_arg(sch, arg);
         struct Qdisc *cl_q = cl->qdisc;
  
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, NULL, &cl_q->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
             qdisc_qstats_copy(d, cl_q) < 0)
                 return -1;
  
@@ -661,7 +660,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
  
         q->nbands = nbands;
         for (i = nstrict; i < q->nstrict; i++) {
-               INIT_LIST_HEAD(&q->classes[i].alist);
                 if (q->classes[i].qdisc->q.qlen) {
                         list_add_tail(&q->classes[i].alist, &q->active);
                         q->classes[i].deficit = quanta[i];
@@ -689,7 +687,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
         ets_offload_change(sch);
         for (i = q->nbands; i < oldbands; i++) {
                 qdisc_put(q->classes[i].qdisc);
-               memset(&q->classes[i], 0, sizeof(q->classes[i]));
+               q->classes[i].qdisc = NULL;
+               q->classes[i].quantum = 0;
+               q->classes[i].deficit = 0;
+               gnet_stats_basic_sync_init(&q->classes[i].bstats);
+               memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
         }
         return 0;
  }
@@ -698,7 +700,7 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
                           struct netlink_ext_ack *extack)
  {
         struct ets_sched *q = qdisc_priv(sch);
-       int err;
+       int err, i;
  
         if (!opt)
                 return -EINVAL;
@@ -708,6 +710,9 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
                 return err;
  
         INIT_LIST_HEAD(&q->active);
+       for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
+               INIT_LIST_HEAD(&q->classes[i].alist);
+
         return ets_qdisc_change(sch, opt, extack);
  }
  
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

index 0229978..b979ae2 100644 (file)
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -304,8 +304,8 @@ trace:
  
  /*
   * Transmit possibly several skbs, and handle the return status as
- * required. Owning running seqcount bit guarantees that
- * only one CPU can execute this function.
+ * required. Owning qdisc running bit guarantees that only one CPU
+ * can execute this function.
   *
   * Returns to the caller:
   *                             false  - hardware queue frozen backoff
@@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = {
         .ops            =       &noop_qdisc_ops,
         .q.lock         =       __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
         .dev_queue      =       &noop_netdev_queue,
-       .running        =       SEQCNT_ZERO(noop_qdisc.running),
         .busylock       =       __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
         .gso_skb = {
                 .next = (struct sk_buff *)&noop_qdisc.gso_skb,
@@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
  EXPORT_SYMBOL(pfifo_fast_ops);
  
  static struct lock_class_key qdisc_tx_busylock;
-static struct lock_class_key qdisc_running_key;
  
  struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                           const struct Qdisc_ops *ops,
@@ -892,11 +890,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
         __skb_queue_head_init(&sch->gso_skb);
         __skb_queue_head_init(&sch->skb_bad_txq);
         qdisc_skb_head_init(&sch->q);
+       gnet_stats_basic_sync_init(&sch->bstats);
         spin_lock_init(&sch->q.lock);
  
         if (ops->static_flags & TCQ_F_CPUSTATS) {
                 sch->cpu_bstats =
-                       netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+                       netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
                 if (!sch->cpu_bstats)
                         goto errout1;
  
@@ -916,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
         lockdep_set_class(&sch->seqlock,
                           dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
  
-       seqcount_init(&sch->running);
-       lockdep_set_class(&sch->running,
-                         dev->qdisc_running_key ?: &qdisc_running_key);
-
         sch->ops = ops;
         sch->flags = ops->static_flags;
         sch->enqueue = ops->enqueue;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c

index 621dc6a..1073c76 100644 (file)
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -56,6 +56,7 @@ struct gred_sched {
         u32             DPs;
         u32             def;
         struct red_vars wred_set;
+       struct tc_gred_qopt_offload *opt;
  };
  
  static inline int gred_wred_mode(struct gred_sched *table)
@@ -311,48 +312,50 @@ static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
  {
         struct gred_sched *table = qdisc_priv(sch);
         struct net_device *dev = qdisc_dev(sch);
-       struct tc_gred_qopt_offload opt = {
-               .command        = command,
-               .handle         = sch->handle,
-               .parent         = sch->parent,
-       };
+       struct tc_gred_qopt_offload *opt = table->opt;
  
         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
                 return;
  
+       memset(opt, 0, sizeof(*opt));
+       opt->command = command;
+       opt->handle = sch->handle;
+       opt->parent = sch->parent;
+
         if (command == TC_GRED_REPLACE) {
                 unsigned int i;
  
-               opt.set.grio_on = gred_rio_mode(table);
-               opt.set.wred_on = gred_wred_mode(table);
-               opt.set.dp_cnt = table->DPs;
-               opt.set.dp_def = table->def;
+               opt->set.grio_on = gred_rio_mode(table);
+               opt->set.wred_on = gred_wred_mode(table);
+               opt->set.dp_cnt = table->DPs;
+               opt->set.dp_def = table->def;
  
                 for (i = 0; i < table->DPs; i++) {
                         struct gred_sched_data *q = table->tab[i];
  
                         if (!q)
                                 continue;
-                       opt.set.tab[i].present = true;
-                       opt.set.tab[i].limit = q->limit;
-                       opt.set.tab[i].prio = q->prio;
-                       opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
-                       opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
-                       opt.set.tab[i].is_ecn = gred_use_ecn(q);
-                       opt.set.tab[i].is_harddrop = gred_use_harddrop(q);
-                       opt.set.tab[i].probability = q->parms.max_P;
-                       opt.set.tab[i].backlog = &q->backlog;
+                       opt->set.tab[i].present = true;
+                       opt->set.tab[i].limit = q->limit;
+                       opt->set.tab[i].prio = q->prio;
+                       opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
+                       opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
+                       opt->set.tab[i].is_ecn = gred_use_ecn(q);
+                       opt->set.tab[i].is_harddrop = gred_use_harddrop(q);
+                       opt->set.tab[i].probability = q->parms.max_P;
+                       opt->set.tab[i].backlog = &q->backlog;
                 }
-               opt.set.qstats = &sch->qstats;
+               opt->set.qstats = &sch->qstats;
         }
  
-       dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt);
+       dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt);
  }
  
  static int gred_offload_dump_stats(struct Qdisc *sch)
  {
         struct gred_sched *table = qdisc_priv(sch);
         struct tc_gred_qopt_offload *hw_stats;
+       u64 bytes = 0, packets = 0;
         unsigned int i;
         int ret;
  
@@ -364,9 +367,11 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
         hw_stats->handle = sch->handle;
         hw_stats->parent = sch->parent;
  
-       for (i = 0; i < MAX_DPs; i++)
+       for (i = 0; i < MAX_DPs; i++) {
+               gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]);
                 if (table->tab[i])
                         hw_stats->stats.xstats[i] = &table->tab[i]->stats;
+       }
  
         ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
         /* Even if driver returns failure adjust the stats - in case offload
@@ -375,19 +380,19 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
         for (i = 0; i < MAX_DPs; i++) {
                 if (!table->tab[i])
                         continue;
-               table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets;
-               table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
+               table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets);
+               table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
                 table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
  
-               _bstats_update(&sch->bstats,
-                              hw_stats->stats.bstats[i].bytes,
-                              hw_stats->stats.bstats[i].packets);
+               bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
+               packets += u64_stats_read(&hw_stats->stats.bstats[i].packets);
                 sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
                 sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
                 sch->qstats.drops += hw_stats->stats.qstats[i].drops;
                 sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
                 sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
         }
+       _bstats_update(&sch->bstats, bytes, packets);
  
         kfree(hw_stats);
         return ret;
@@ -728,6 +733,7 @@ err_unlock_free:
  static int gred_init(struct Qdisc *sch, struct nlattr *opt,
                      struct netlink_ext_ack *extack)
  {
+       struct gred_sched *table = qdisc_priv(sch);
         struct nlattr *tb[TCA_GRED_MAX + 1];
         int err;
  
@@ -751,6 +757,12 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
                 sch->limit = qdisc_dev(sch)->tx_queue_len
                              * psched_mtu(qdisc_dev(sch));
  
+       if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) {
+               table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL);
+               if (!table->opt)
+                       return -ENOMEM;
+       }
+
         return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
  }
  
@@ -907,6 +919,7 @@ static void gred_destroy(struct Qdisc *sch)
                         gred_destroy_vq(table->tab[i]);
         }
         gred_offload(sch, TC_GRED_DESTROY);
+       kfree(table->opt);
  }
  
  static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c

index c802a02..03efc40 100644 (file)
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -111,7 +111,7 @@ enum hfsc_class_flags {
  struct hfsc_class {
         struct Qdisc_class_common cl_common;
  
-       struct gnet_stats_basic_packed bstats;
+       struct gnet_stats_basic_sync bstats;
         struct gnet_stats_queue qstats;
         struct net_rate_estimator __rcu *rate_est;
         struct tcf_proto __rcu *filter_list; /* filter list */
@@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
                         err = gen_replace_estimator(&cl->bstats, NULL,
                                                     &cl->rate_est,
                                                     NULL,
-                                                   qdisc_root_sleeping_running(sch),
+                                                   true,
                                                     tca[TCA_RATE]);
                         if (err)
                                 return err;
@@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
  
         if (tca[TCA_RATE]) {
                 err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-                                       NULL,
-                                       qdisc_root_sleeping_running(sch),
-                                       tca[TCA_RATE]);
+                                       NULL, true, tca[TCA_RATE]);
                 if (err) {
                         tcf_block_put(cl->block);
                         kfree(cl);
@@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
         xstats.work    = cl->cl_total;
         xstats.rtwork  = cl->cl_cumul;
  
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
             gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
                 return -1;
@@ -1406,6 +1404,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
         if (err)
                 return err;
  
+       gnet_stats_basic_sync_init(&q->root.bstats);
         q->root.cl_common.classid = sch->handle;
         q->root.sched   = q;
         q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c

index 45b92e4..177aacc 100644 (file)
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -113,8 +113,8 @@ struct htb_class {
         /*
          * Written often fields
          */
-       struct gnet_stats_basic_packed bstats;
-       struct gnet_stats_basic_packed bstats_bias;
+       struct gnet_stats_basic_sync bstats;
+       struct gnet_stats_basic_sync bstats_bias;
         struct tc_htb_xstats    xstats; /* our special stats */
  
         /* token bucket parameters */
@@ -1306,10 +1306,11 @@ nla_put_failure:
  static void htb_offload_aggregate_stats(struct htb_sched *q,
                                         struct htb_class *cl)
  {
+       u64 bytes = 0, packets = 0;
         struct htb_class *c;
         unsigned int i;
  
-       memset(&cl->bstats, 0, sizeof(cl->bstats));
+       gnet_stats_basic_sync_init(&cl->bstats);
  
         for (i = 0; i < q->clhash.hashsize; i++) {
                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
@@ -1321,14 +1322,15 @@ static void htb_offload_aggregate_stats(struct htb_sched *q,
                         if (p != cl)
                                 continue;
  
-                       cl->bstats.bytes += c->bstats_bias.bytes;
-                       cl->bstats.packets += c->bstats_bias.packets;
+                       bytes += u64_stats_read(&c->bstats_bias.bytes);
+                       packets += u64_stats_read(&c->bstats_bias.packets);
                         if (c->level == 0) {
-                               cl->bstats.bytes += c->leaf.q->bstats.bytes;
-                               cl->bstats.packets += c->leaf.q->bstats.packets;
+                               bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
+                               packets += u64_stats_read(&c->leaf.q->bstats.packets);
                         }
                 }
         }
+       _bstats_update(&cl->bstats, bytes, packets);
  }
  
  static int
@@ -1355,16 +1357,16 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
                         if (cl->leaf.q)
                                 cl->bstats = cl->leaf.q->bstats;
                         else
-                               memset(&cl->bstats, 0, sizeof(cl->bstats));
-                       cl->bstats.bytes += cl->bstats_bias.bytes;
-                       cl->bstats.packets += cl->bstats_bias.packets;
+                               gnet_stats_basic_sync_init(&cl->bstats);
+                       _bstats_update(&cl->bstats,
+                                      u64_stats_read(&cl->bstats_bias.bytes),
+                                      u64_stats_read(&cl->bstats_bias.packets));
                 } else {
                         htb_offload_aggregate_stats(q, cl);
                 }
         }
  
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, NULL, &cl->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
                 return -1;
@@ -1579,8 +1581,9 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
         }
  
         if (cl->parent) {
-               cl->parent->bstats_bias.bytes += q->bstats.bytes;
-               cl->parent->bstats_bias.packets += q->bstats.packets;
+               _bstats_update(&cl->parent->bstats_bias,
+                              u64_stats_read(&q->bstats.bytes),
+                              u64_stats_read(&q->bstats.packets));
         }
  
         offload_opt = (struct tc_htb_qopt_offload) {
@@ -1872,6 +1875,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                 if (!cl)
                         goto failure;
  
+               gnet_stats_basic_sync_init(&cl->bstats);
+               gnet_stats_basic_sync_init(&cl->bstats_bias);
+
                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
                 if (err) {
                         kfree(cl);
@@ -1881,7 +1887,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                         err = gen_new_estimator(&cl->bstats, NULL,
                                                 &cl->rate_est,
                                                 NULL,
-                                               qdisc_root_sleeping_running(sch),
+                                               true,
                                                 tca[TCA_RATE] ? : &est.nla);
                         if (err)
                                 goto err_block_put;
@@ -1945,8 +1951,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                                 htb_graft_helper(dev_queue, old_q);
                                 goto err_kill_estimator;
                         }
-                       parent->bstats_bias.bytes += old_q->bstats.bytes;
-                       parent->bstats_bias.packets += old_q->bstats.packets;
+                       _bstats_update(&parent->bstats_bias,
+                                      u64_stats_read(&old_q->bstats.bytes),
+                                      u64_stats_read(&old_q->bstats.packets));
                         qdisc_put(old_q);
                 }
                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
@@ -2006,7 +2013,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                         err = gen_replace_estimator(&cl->bstats, NULL,
                                                     &cl->rate_est,
                                                     NULL,
-                                                   qdisc_root_sleeping_running(sch),
+                                                   true,
                                                     tca[TCA_RATE]);
                         if (err)
                                 return err;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c

index db18d8a..24c5d97 100644 (file)
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -153,10 +153,9 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
         struct net_device *dev = qdisc_dev(sch);
         struct Qdisc *qdisc;
         unsigned int ntx;
-       __u32 qlen = 0;
  
         sch->q.qlen = 0;
-       memset(&sch->bstats, 0, sizeof(sch->bstats));
+       gnet_stats_basic_sync_init(&sch->bstats);
         memset(&sch->qstats, 0, sizeof(sch->qstats));
  
         /* MQ supports lockless qdiscs. However, statistics accounting needs
@@ -168,25 +167,11 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
                 qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
                 spin_lock_bh(qdisc_lock(qdisc));
  
-               if (qdisc_is_percpu_stats(qdisc)) {
-                       qlen = qdisc_qlen_sum(qdisc);
-                       __gnet_stats_copy_basic(NULL, &sch->bstats,
-                                               qdisc->cpu_bstats,
-                                               &qdisc->bstats);
-                       __gnet_stats_copy_queue(&sch->qstats,
-                                               qdisc->cpu_qstats,
-                                               &qdisc->qstats, qlen);
-                       sch->q.qlen             += qlen;
-               } else {
-                       sch->q.qlen             += qdisc->q.qlen;
-                       sch->bstats.bytes       += qdisc->bstats.bytes;
-                       sch->bstats.packets     += qdisc->bstats.packets;
-                       sch->qstats.qlen        += qdisc->qstats.qlen;
-                       sch->qstats.backlog     += qdisc->qstats.backlog;
-                       sch->qstats.drops       += qdisc->qstats.drops;
-                       sch->qstats.requeues    += qdisc->qstats.requeues;
-                       sch->qstats.overlimits  += qdisc->qstats.overlimits;
-               }
+               gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
+                                    &qdisc->bstats, false);
+               gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+                                    &qdisc->qstats);
+               sch->q.qlen += qdisc_qlen(qdisc);
  
                 spin_unlock_bh(qdisc_lock(qdisc));
         }
@@ -269,8 +254,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
         struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
  
         sch = dev_queue->qdisc_sleeping;
-       if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
-                                 &sch->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
             qdisc_qstats_copy(d, sch) < 0)
                 return -1;
         return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c

index 50e15ad..42d4101 100644 (file)
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -412,7 +412,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
         unsigned int ntx, tc;
  
         sch->q.qlen = 0;
-       memset(&sch->bstats, 0, sizeof(sch->bstats));
+       gnet_stats_basic_sync_init(&sch->bstats);
         memset(&sch->qstats, 0, sizeof(sch->qstats));
  
         /* MQ supports lockless qdiscs. However, statistics accounting needs
@@ -424,25 +424,11 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
                 qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
                 spin_lock_bh(qdisc_lock(qdisc));
  
-               if (qdisc_is_percpu_stats(qdisc)) {
-                       __u32 qlen = qdisc_qlen_sum(qdisc);
-
-                       __gnet_stats_copy_basic(NULL, &sch->bstats,
-                                               qdisc->cpu_bstats,
-                                               &qdisc->bstats);
-                       __gnet_stats_copy_queue(&sch->qstats,
-                                               qdisc->cpu_qstats,
-                                               &qdisc->qstats, qlen);
-                       sch->q.qlen             += qlen;
-               } else {
-                       sch->q.qlen             += qdisc->q.qlen;
-                       sch->bstats.bytes       += qdisc->bstats.bytes;
-                       sch->bstats.packets     += qdisc->bstats.packets;
-                       sch->qstats.backlog     += qdisc->qstats.backlog;
-                       sch->qstats.drops       += qdisc->qstats.drops;
-                       sch->qstats.requeues    += qdisc->qstats.requeues;
-                       sch->qstats.overlimits  += qdisc->qstats.overlimits;
-               }
+               gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
+                                    &qdisc->bstats, false);
+               gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+                                    &qdisc->qstats);
+               sch->q.qlen += qdisc_qlen(qdisc);
  
                 spin_unlock_bh(qdisc_lock(qdisc));
         }
@@ -534,12 +520,13 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  {
         if (cl >= TC_H_MIN_PRIORITY) {
                 int i;
-               __u32 qlen = 0;
+               __u32 qlen;
                 struct gnet_stats_queue qstats = {0};
-               struct gnet_stats_basic_packed bstats = {0};
+               struct gnet_stats_basic_sync bstats;
                 struct net_device *dev = qdisc_dev(sch);
                 struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
  
+               gnet_stats_basic_sync_init(&bstats);
                 /* Drop lock here it will be reclaimed before touching
                  * statistics this is required because the d->lock we
                  * hold here is the look on dev_queue->qdisc_sleeping
@@ -554,40 +541,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  
                         spin_lock_bh(qdisc_lock(qdisc));
  
-                       if (qdisc_is_percpu_stats(qdisc)) {
-                               qlen = qdisc_qlen_sum(qdisc);
-
-                               __gnet_stats_copy_basic(NULL, &bstats,
-                                                       qdisc->cpu_bstats,
-                                                       &qdisc->bstats);
-                               __gnet_stats_copy_queue(&qstats,
-                                                       qdisc->cpu_qstats,
-                                                       &qdisc->qstats,
-                                                       qlen);
-                       } else {
-                               qlen            += qdisc->q.qlen;
-                               bstats.bytes    += qdisc->bstats.bytes;
-                               bstats.packets  += qdisc->bstats.packets;
-                               qstats.backlog  += qdisc->qstats.backlog;
-                               qstats.drops    += qdisc->qstats.drops;
-                               qstats.requeues += qdisc->qstats.requeues;
-                               qstats.overlimits += qdisc->qstats.overlimits;
-                       }
+                       gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
+                                            &qdisc->bstats, false);
+                       gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
+                                            &qdisc->qstats);
+                       sch->q.qlen += qdisc_qlen(qdisc);
+
                         spin_unlock_bh(qdisc_lock(qdisc));
                 }
+               qlen = qdisc_qlen(sch) + qstats.qlen;
  
                 /* Reclaim root sleeping lock before completing stats */
                 if (d->lock)
                         spin_lock_bh(d->lock);
-               if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
+               if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
                     gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
                         return -1;
         } else {
                 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
  
                 sch = dev_queue->qdisc_sleeping;
-               if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
-                                         sch->cpu_bstats, &sch->bstats) < 0 ||
+               if (gnet_stats_copy_basic(d, sch->cpu_bstats,
+                                         &sch->bstats, true) < 0 ||
                     qdisc_qstats_copy(d, sch) < 0)
                         return -1;
         }
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c

index 8b99f07..f28050c 100644 (file)
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -337,8 +337,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
         struct Qdisc *cl_q;
  
         cl_q = q->queues[cl - 1];
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 ||
             qdisc_qstats_copy(d, cl_q) < 0)
                 return -1;
  
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c

index 2e0b1e7..c03a11d 100644 (file)
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -359,8 +359,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
         struct Qdisc *cl_q;
  
         cl_q = q->queues[cl - 1];
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, cl_q->cpu_bstats,
+                                 &cl_q->bstats, true) < 0 ||
             qdisc_qstats_copy(d, cl_q) < 0)
                 return -1;
  
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c

index 50e51c1..13246a9 100644 (file)
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -131,7 +131,7 @@ struct qfq_class {
  
         unsigned int filter_cnt;
  
-       struct gnet_stats_basic_packed bstats;
+       struct gnet_stats_basic_sync bstats;
         struct gnet_stats_queue qstats;
         struct net_rate_estimator __rcu *rate_est;
         struct Qdisc *qdisc;
@@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
                         err = gen_replace_estimator(&cl->bstats, NULL,
                                                     &cl->rate_est,
                                                     NULL,
-                                                   qdisc_root_sleeping_running(sch),
+                                                   true,
                                                     tca[TCA_RATE]);
                         if (err)
                                 return err;
@@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
         if (cl == NULL)
                 return -ENOBUFS;
  
+       gnet_stats_basic_sync_init(&cl->bstats);
         cl->common.classid = classid;
         cl->deficit = lmax;
  
@@ -477,7 +478,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
                 err = gen_new_estimator(&cl->bstats, NULL,
                                         &cl->rate_est,
                                         NULL,
-                                       qdisc_root_sleeping_running(sch),
+                                       true,
                                         tca[TCA_RATE]);
                 if (err)
                         goto destroy_class;
@@ -639,8 +640,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
         xstats.weight = cl->agg->class_weight;
         xstats.lmax = cl->agg->lmax;
  
-       if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, NULL, &cl->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
             qdisc_qstats_copy(d, cl->qdisc) < 0)
                 return -1;
@@ -1234,8 +1234,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                 return err;
         }
  
-       cl->bstats.bytes += len;
-       cl->bstats.packets += gso_segs;
+       _bstats_update(&cl->bstats, len, gso_segs);
         sch->qstats.backlog += len;
         ++sch->q.qlen;
  
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c

index 135ea8b..695e8a4 100644 (file)
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1989,7 +1989,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
         struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
  
         sch = dev_queue->qdisc_sleeping;
-       if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+       if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
             qdisc_qstats_copy(d, sch) < 0)
                 return -1;
         return 0;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c

index 935bba0..962e797 100644 (file)
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -441,7 +441,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
         if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
                 return;
  
-       cpu = get_cpu();
+       cpu = get_cpu_light();
         pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
  
         atomic_long_inc(&pool->sp_stats.packets);
@@ -465,7 +465,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
         rqstp = NULL;
  out_unlock:
         rcu_read_unlock();
-       put_cpu();
+       put_cpu_light();
         trace_svc_xprt_do_enqueue(xprt, rqstp);
  }
  EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c

index 5a90aa5..642d074 100644 (file)
--- a/samples/kfifo/bytestream-example.c
+++ b/samples/kfifo/bytestream-example.c
@@ -22,10 +22,10 @@
  #define        PROC_FIFO       "bytestream-fifo"
  
  /* lock for procfs read access */
-static DEFINE_MUTEX(read_lock);
+static DEFINE_MUTEX(read_access);
  
  /* lock for procfs write access */
-static DEFINE_MUTEX(write_lock);
+static DEFINE_MUTEX(write_access);
  
  /*
   * define DYNAMIC in this example for a dynamically allocated fifo.
@@ -116,12 +116,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
         int ret;
         unsigned int copied;
  
-       if (mutex_lock_interruptible(&write_lock))
+       if (mutex_lock_interruptible(&write_access))
                 return -ERESTARTSYS;
  
         ret = kfifo_from_user(&test, buf, count, &copied);
  
-       mutex_unlock(&write_lock);
+       mutex_unlock(&write_access);
         if (ret)
                 return ret;
  
@@ -134,12 +134,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
         int ret;
         unsigned int copied;
  
-       if (mutex_lock_interruptible(&read_lock))
+       if (mutex_lock_interruptible(&read_access))
                 return -ERESTARTSYS;
  
         ret = kfifo_to_user(&test, buf, count, &copied);
  
-       mutex_unlock(&read_lock);
+       mutex_unlock(&read_access);
         if (ret)
                 return ret;
  
diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c

index e5403d8..c61482b 100644 (file)
--- a/samples/kfifo/inttype-example.c
+++ b/samples/kfifo/inttype-example.c
@@ -22,10 +22,10 @@
  #define        PROC_FIFO       "int-fifo"
  
  /* lock for procfs read access */
-static DEFINE_MUTEX(read_lock);
+static DEFINE_MUTEX(read_access);
  
  /* lock for procfs write access */
-static DEFINE_MUTEX(write_lock);
+static DEFINE_MUTEX(write_access);
  
  /*
   * define DYNAMIC in this example for a dynamically allocated fifo.
@@ -109,12 +109,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
         int ret;
         unsigned int copied;
  
-       if (mutex_lock_interruptible(&write_lock))
+       if (mutex_lock_interruptible(&write_access))
                 return -ERESTARTSYS;
  
         ret = kfifo_from_user(&test, buf, count, &copied);
  
-       mutex_unlock(&write_lock);
+       mutex_unlock(&write_access);
         if (ret)
                 return ret;
  
@@ -127,12 +127,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
         int ret;
         unsigned int copied;
  
-       if (mutex_lock_interruptible(&read_lock))
+       if (mutex_lock_interruptible(&read_access))
                 return -ERESTARTSYS;
  
         ret = kfifo_to_user(&test, buf, count, &copied);
  
-       mutex_unlock(&read_lock);
+       mutex_unlock(&read_access);
         if (ret)
                 return ret;
  
diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c

index f64f3d6..e4087b2 100644 (file)
--- a/samples/kfifo/record-example.c
+++ b/samples/kfifo/record-example.c
@@ -22,10 +22,10 @@
  #define        PROC_FIFO       "record-fifo"
  
  /* lock for procfs read access */
-static DEFINE_MUTEX(read_lock);
+static DEFINE_MUTEX(read_access);
  
  /* lock for procfs write access */
-static DEFINE_MUTEX(write_lock);
+static DEFINE_MUTEX(write_access);
  
  /*
   * define DYNAMIC in this example for a dynamically allocated fifo.
@@ -123,12 +123,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
         int ret;
         unsigned int copied;
  
-       if (mutex_lock_interruptible(&write_lock))
+       if (mutex_lock_interruptible(&write_access))
                 return -ERESTARTSYS;
  
         ret = kfifo_from_user(&test, buf, count, &copied);
  
-       mutex_unlock(&write_lock);
+       mutex_unlock(&write_access);
         if (ret)
                 return ret;
  
@@ -141,12 +141,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
         int ret;
         unsigned int copied;
  
-       if (mutex_lock_interruptible(&read_lock))
+       if (mutex_lock_interruptible(&read_access))
                 return -ERESTARTSYS;
  
         ret = kfifo_to_user(&test, buf, count, &copied);
  
-       mutex_unlock(&read_lock);
+       mutex_unlock(&read_access);
         if (ret)
                 return ret;
  
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c

index 3f3f56f..5dbcdc5 100644 (file)
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -51,8 +51,10 @@
  #define SMK_RECEIVING  1
  #define SMK_SENDING    2
  
+#ifdef SMACK_IPV6_PORT_LABELING
  static DEFINE_MUTEX(smack_ipv6_lock);
  static LIST_HEAD(smk_ipv6_port_list);
+#endif
  struct kmem_cache *smack_rule_cache;
  int smack_enabled __initdata;
  
@@ -2603,7 +2605,6 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address)
         mutex_unlock(&smack_ipv6_lock);
         return;
  }
-#endif
  
  /**
   * smk_ipv6_port_check - check Smack port access
@@ -2666,6 +2667,7 @@ static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address,
  
         return smk_ipv6_check(skp, object, address, act);
  }
+#endif
  
  /**
   * smack_inode_setsecurity - set smack xattrs
@@ -2852,8 +2854,9 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap,
                         rc = smk_ipv6_check(ssp->smk_out, rsp, sip,
                                             SMK_CONNECTING);
                 }
-               if (__is_defined(SMACK_IPV6_PORT_LABELING))
-                       rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
+#ifdef SMACK_IPV6_PORT_LABELING
+               rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
+#endif
  
                 return rc;
         }
diff --git a/sound/soc/mediatek/common/mtk-afe-fe-dai.c b/sound/soc/mediatek/common/mtk-afe-fe-dai.c

index e95c7c0..4f2c237 100644 (file)
--- a/sound/soc/mediatek/common/mtk-afe-fe-dai.c
+++ b/sound/soc/mediatek/common/mtk-afe-fe-dai.c
@@ -288,7 +288,6 @@ const struct snd_soc_dai_ops mtk_afe_fe_ops = {
  };
  EXPORT_SYMBOL_GPL(mtk_afe_fe_ops);
  
-static DEFINE_MUTEX(irqs_lock);
  int mtk_dynamic_irq_acquire(struct mtk_base_afe *afe)
  {
         int i;
author	Thomas Gleixner <tglx@linutronix.de>
	Tue, 21 Sep 2021 21:12:50 +0000 (23:12 +0200)
committer	Hoegeun Kwon <hoegeun.kwon@samsung.com>
	Thu, 3 Aug 2023 08:55:09 +0000 (17:55 +0900)
Documentation/admin-guide/cgroup-v1/memory.rst		patch \| blob \| history
Documentation/dev-tools/kcov.rst		patch \| blob \| history
arch/alpha/include/asm/spinlock_types.h		patch \| blob \| history
arch/arm/Kconfig		patch \| blob \| history
arch/arm/include/asm/spinlock_types.h		patch \| blob \| history
arch/arm/include/asm/thread_info.h		patch \| blob \| history
arch/arm/kernel/asm-offsets.c		patch \| blob \| history
arch/arm/kernel/entry-armv.S		patch \| blob \| history
arch/arm/kernel/signal.c		patch \| blob \| history
arch/arm/mm/fault.c		patch \| blob \| history
arch/arm64/Kconfig		patch \| blob \| history
arch/arm64/include/asm/pgtable.h		patch \| blob \| history
arch/arm64/include/asm/preempt.h		patch \| blob \| history
arch/arm64/include/asm/signal.h		patch \| blob \| history
arch/arm64/include/asm/spinlock_types.h		patch \| blob \| history
arch/arm64/include/asm/thread_info.h		patch \| blob \| history
arch/arm64/kernel/asm-offsets.c		patch \| blob \| history
arch/arm64/kernel/fpsimd.c		patch \| blob \| history
arch/arm64/kernel/signal.c		patch \| blob \| history
arch/arm64/kvm/arm.c		patch \| blob \| history
arch/csky/include/asm/spinlock_types.h		patch \| blob \| history
arch/hexagon/include/asm/spinlock_types.h		patch \| blob \| history
arch/ia64/include/asm/spinlock_types.h		patch \| blob \| history
arch/powerpc/Kconfig		patch \| blob \| history
arch/powerpc/include/asm/simple_spinlock_types.h		patch \| blob \| history
arch/powerpc/include/asm/smp.h		patch \| blob \| history
arch/powerpc/include/asm/spinlock_types.h		patch \| blob \| history
arch/powerpc/include/asm/stackprotector.h		patch \| blob \| history
arch/powerpc/include/asm/thread_info.h		patch \| blob \| history
arch/powerpc/kernel/interrupt.c		patch \| blob \| history
arch/powerpc/kernel/irq.c		patch \| blob \| history
arch/powerpc/kernel/kgdb.c		patch \| blob \| history
arch/powerpc/kernel/smp.c		patch \| blob \| history
arch/powerpc/kernel/traps.c		patch \| blob \| history
arch/powerpc/kvm/Kconfig		patch \| blob \| history
arch/powerpc/platforms/pseries/iommu.c		patch \| blob \| history
arch/riscv/include/asm/spinlock_types.h		patch \| blob \| history
arch/s390/include/asm/spinlock_types.h		patch \| blob \| history
arch/sh/include/asm/spinlock_types.h		patch \| blob \| history
arch/sh/kernel/irq.c		patch \| blob \| history
arch/sparc/kernel/irq_64.c		patch \| blob \| history
arch/x86/Kconfig		patch \| blob \| history
arch/x86/include/asm/irq_stack.h		patch \| blob \| history
arch/x86/include/asm/preempt.h		patch \| blob \| history
arch/x86/include/asm/signal.h		patch \| blob \| history
arch/x86/include/asm/stackprotector.h		patch \| blob \| history
arch/x86/include/asm/thread_info.h		patch \| blob \| history
arch/x86/kernel/irq_32.c		patch \| blob \| history
arch/x86/kernel/kgdb.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/xtensa/include/asm/spinlock_types.h		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
crypto/testmgr.c		patch \| blob \| history
drivers/block/zram/zram_drv.c		patch \| blob \| history
drivers/block/zram/zram_drv.h		patch \| blob \| history
drivers/char/tpm/tpm_tis.c		patch \| blob \| history
drivers/firmware/efi/efi.c		patch \| blob \| history
drivers/gpu/drm/i915/display/intel_crtc.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_breadcrumbs.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_context.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_context_types.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_pm.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_execlists_submission.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_irq.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_trace.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_utils.h		patch \| blob \| history
drivers/i2c/busses/i2c-cht-wc.c		patch \| blob \| history
drivers/i2c/i2c-core-base.c		patch \| blob \| history
drivers/leds/trigger/Kconfig		patch \| blob \| history
drivers/md/raid5.c		patch \| blob \| history
drivers/md/raid5.h		patch \| blob \| history
drivers/mfd/ezx-pcap.c		patch \| blob \| history
drivers/misc/hi6421v600-irq.c		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/abm/qdisc.c		patch \| blob \| history
drivers/net/usb/lan78xx.c		patch \| blob \| history
drivers/scsi/fcoe/fcoe.c		patch \| blob \| history
drivers/scsi/fcoe/fcoe_ctlr.c		patch \| blob \| history
drivers/scsi/libfc/fc_exch.c		patch \| blob \| history
drivers/staging/greybus/gpio.c		patch \| blob \| history
drivers/tty/serial/8250/8250.h		patch \| blob \| history
drivers/tty/serial/8250/8250_core.c		patch \| blob \| history
drivers/tty/serial/8250/8250_fsl.c		patch \| blob \| history
drivers/tty/serial/8250/8250_ingenic.c		patch \| blob \| history
drivers/tty/serial/8250/8250_mtk.c		patch \| blob \| history
drivers/tty/serial/8250/8250_port.c		patch \| blob \| history
drivers/tty/serial/amba-pl011.c		patch \| blob \| history
drivers/tty/serial/omap-serial.c		patch \| blob \| history
drivers/virt/acrn/irqfd.c		patch \| blob \| history
fs/afs/dir_silly.c		patch \| blob \| history
fs/cifs/readdir.c		patch \| blob \| history
fs/dcache.c		patch \| blob \| history
fs/fscache/internal.h		patch \| blob \| history
fs/fscache/main.c		patch \| blob \| history
fs/fscache/object.c		patch \| blob \| history
fs/fuse/readdir.c		patch \| blob \| history
fs/namei.c		patch \| blob \| history
fs/namespace.c		patch \| blob \| history
fs/nfs/dir.c		patch \| blob \| history
fs/nfs/unlink.c		patch \| blob \| history
fs/proc/base.c		patch \| blob \| history
fs/proc/proc_sysctl.c		patch \| blob \| history
include/asm-generic/softirq_stack.h		patch \| blob \| history
include/linux/console.h		patch \| blob \| history
include/linux/dcache.h		patch \| blob \| history
include/linux/entry-common.h		patch \| blob \| history
include/linux/irq_work.h		patch \| blob \| history
include/linux/irqdesc.h		patch \| blob \| history
include/linux/irqflags.h		patch \| blob \| history
include/linux/kernel.h		patch \| blob \| history
include/linux/kgdb.h		patch \| blob \| history
include/linux/mm_types.h		patch \| blob \| history
include/linux/netdevice.h		patch \| blob \| history
include/linux/nfs_xdr.h		patch \| blob \| history
include/linux/preempt.h		patch \| blob \| history
include/linux/printk.h		patch \| blob \| history
include/linux/ratelimit_types.h		patch \| blob \| history
include/linux/rcupdate.h		patch \| blob \| history
include/linux/rtmutex.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/sched/mm.h		patch \| blob \| history
include/linux/serial_8250.h		patch \| blob \| history
include/linux/skbuff.h		patch \| blob \| history
include/linux/smp.h		patch \| blob \| history
include/linux/spinlock_types_up.h		patch \| blob \| history
include/linux/thread_info.h		patch \| blob \| history
include/linux/trace_events.h		patch \| blob \| history
include/linux/u64_stats_sync.h		patch \| blob \| history
include/net/act_api.h		patch \| blob \| history
include/net/gen_stats.h		patch \| blob \| history
include/net/netfilter/xt_rateest.h		patch \| blob \| history
include/net/pkt_cls.h		patch \| blob \| history
include/net/sch_generic.h		patch \| blob \| history
init/Kconfig		patch \| blob \| history
init/main.c		patch \| blob \| history
kernel/Kconfig.preempt		patch \| blob \| history
kernel/cgroup/rstat.c		patch \| blob \| history
kernel/debug/debug_core.c		patch \| blob \| history
kernel/debug/kdb/kdb_io.c		patch \| blob \| history
kernel/entry/common.c		patch \| blob \| history
kernel/exit.c		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
kernel/irq/irqdesc.c		patch \| blob \| history
kernel/irq/manage.c		patch \| blob \| history
kernel/irq/spurious.c		patch \| blob \| history
kernel/irq_work.c		patch \| blob \| history
kernel/kcov.c		patch \| blob \| history
kernel/kprobes.c		patch \| blob \| history
kernel/ksysfs.c		patch \| blob \| history
kernel/kthread.c		patch \| blob \| history
kernel/locking/lockdep.c		patch \| blob \| history
kernel/locking/rtmutex.c		patch \| blob \| history
kernel/locking/rtmutex_api.c		patch \| blob \| history
kernel/locking/spinlock_rt.c		patch \| blob \| history
kernel/panic.c		patch \| blob \| history
kernel/printk/printk.c		patch \| blob \| history
kernel/ptrace.c		patch \| blob \| history
kernel/rcu/tasks.h		patch \| blob \| history
kernel/rcu/tree.c		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/features.h		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history
kernel/sched/swait.c		patch \| blob \| history
kernel/sched/topology.c		patch \| blob \| history
kernel/signal.c		patch \| blob \| history
kernel/smp.c		patch \| blob \| history
kernel/trace/trace.c		patch \| blob \| history
kernel/trace/trace_events.c		patch \| blob \| history
kernel/trace/trace_output.c		patch \| blob \| history
lib/bug.c		patch \| blob \| history
lib/dump_stack.c		patch \| blob \| history
lib/irq_poll.c		patch \| blob \| history
lib/locking-selftest.c		patch \| blob \| history
lib/nmi_backtrace.c		patch \| blob \| history
lib/scatterlist.c		patch \| blob \| history
localversion-rt	[new file with mode: 0644]	patch \| blob
mm/Kconfig		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/vmalloc.c		patch \| blob \| history
mm/workingset.c		patch \| blob \| history
mm/zsmalloc.c		patch \| blob \| history
net/Kconfig		patch \| blob \| history
net/core/dev.c		patch \| blob \| history
net/core/gen_estimator.c		patch \| blob \| history
net/core/gen_stats.c		patch \| blob \| history
net/netfilter/xt_RATEEST.c		patch \| blob \| history
net/sched/act_api.c		patch \| blob \| history
net/sched/act_bpf.c		patch \| blob \| history
net/sched/act_ife.c		patch \| blob \| history
net/sched/act_mpls.c		patch \| blob \| history
net/sched/act_police.c		patch \| blob \| history
net/sched/act_sample.c		patch \| blob \| history
net/sched/act_simple.c		patch \| blob \| history
net/sched/act_skbedit.c		patch \| blob \| history
net/sched/act_skbmod.c		patch \| blob \| history
net/sched/sch_api.c		patch \| blob \| history
net/sched/sch_atm.c		patch \| blob \| history
net/sched/sch_cbq.c		patch \| blob \| history
net/sched/sch_drr.c		patch \| blob \| history
net/sched/sch_ets.c		patch \| blob \| history
net/sched/sch_generic.c		patch \| blob \| history
net/sched/sch_gred.c		patch \| blob \| history
net/sched/sch_hfsc.c		patch \| blob \| history
net/sched/sch_htb.c		patch \| blob \| history
net/sched/sch_mq.c		patch \| blob \| history
net/sched/sch_mqprio.c		patch \| blob \| history
net/sched/sch_multiq.c		patch \| blob \| history
net/sched/sch_prio.c		patch \| blob \| history
net/sched/sch_qfq.c		patch \| blob \| history
net/sched/sch_taprio.c		patch \| blob \| history
net/sunrpc/svc_xprt.c		patch \| blob \| history
samples/kfifo/bytestream-example.c		patch \| blob \| history
samples/kfifo/inttype-example.c		patch \| blob \| history
samples/kfifo/record-example.c		patch \| blob \| history
security/smack/smack_lsm.c		patch \| blob \| history
sound/soc/mediatek/common/mtk-afe-fe-dai.c		patch \| blob \| history