mm, vmstat: allow WQ concurrency to discover memory reclaim doesn't make any progress

author Michal Hocko <mhocko@suse.com>

Fri, 11 Dec 2015 21:40:32 +0000 (13:40 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 12 Dec 2015 18:15:34 +0000 (10:15 -0800)
author Michal Hocko <mhocko@suse.com>
Fri, 11 Dec 2015 21:40:32 +0000 (13:40 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Dec 2015 18:15:34 +0000 (10:15 -0800)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c

index 8ed2ffd..7340353 100644 (file)
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -957,8 +957,9 @@ EXPORT_SYMBOL(congestion_wait);
   * jiffies for either a BDI to exit congestion of the given @sync queue
   * or a write to complete.
   *
- * In the absence of zone congestion, cond_resched() is called to yield
- * the processor if necessary but otherwise does not sleep.
+ * In the absence of zone congestion, a short sleep or a cond_resched is
+ * performed to yield the processor and to allow other subsystems to make
+ * a forward progress.
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
@@ -978,7 +979,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
          */
         if (atomic_read(&nr_wb_congested[sync]) == 0 ||
             !test_bit(ZONE_CONGESTED, &zone->flags)) {
-               cond_resched();
+
+               /*
+                * Memory allocation/reclaim might be called from a WQ
+                * context and the current implementation of the WQ
+                * concurrency control doesn't recognize that a particular
+                * WQ is congested if the worker thread is looping without
+                * ever sleeping. Therefore we have to do a short sleep
+                * here rather than calling cond_resched().
+                */
+               if (current->flags & PF_WQ_WORKER)
+                       schedule_timeout(1);
+               else
+                       cond_resched();
  
                 /* In case we scheduled, work out time remaining */
                 ret = timeout - (jiffies - start);
diff --git a/mm/vmstat.c b/mm/vmstat.c

index 2ec3434..0d5712b 100644 (file)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1379,6 +1379,7 @@ static const struct file_operations proc_vmstat_file_operations = {
  #endif /* CONFIG_PROC_FS */
  
  #ifdef CONFIG_SMP
+static struct workqueue_struct *vmstat_wq;
  static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
  int sysctl_stat_interval __read_mostly = HZ;
  static cpumask_var_t cpu_stat_off;
@@ -1391,7 +1392,7 @@ static void vmstat_update(struct work_struct *w)
                  * to occur in the future. Keep on running the
                  * update worker thread.
                  */
-               schedule_delayed_work_on(smp_processor_id(),
+               queue_delayed_work_on(smp_processor_id(), vmstat_wq,
                         this_cpu_ptr(&vmstat_work),
                         round_jiffies_relative(sysctl_stat_interval));
         } else {
@@ -1460,7 +1461,7 @@ static void vmstat_shepherd(struct work_struct *w)
                 if (need_update(cpu) &&
                         cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
  
-                       schedule_delayed_work_on(cpu,
+                       queue_delayed_work_on(cpu, vmstat_wq,
                                 &per_cpu(vmstat_work, cpu), 0);
  
         put_online_cpus();
@@ -1549,6 +1550,7 @@ static int __init setup_vmstat(void)
  
         start_shepherd_timer();
         cpu_notifier_register_done();
+       vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
  #endif
  #ifdef CONFIG_PROC_FS
         proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
author	Michal Hocko <mhocko@suse.com>
	Fri, 11 Dec 2015 21:40:32 +0000 (13:40 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 12 Dec 2015 18:15:34 +0000 (10:15 -0800)
mm/backing-dev.c		patch \| blob \| history
mm/vmstat.c		patch \| blob \| history