mm: vmscan: Reduce throttling due to a failure to make progress

author Mel Gorman <mgorman@techsingularity.net>

Thu, 2 Dec 2021 15:06:14 +0000 (15:06 +0000)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 31 Dec 2021 19:17:07 +0000 (11:17 -0800)
author Mel Gorman <mgorman@techsingularity.net>
Thu, 2 Dec 2021 15:06:14 +0000 (15:06 +0000)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 31 Dec 2021 19:17:07 +0000 (11:17 -0800)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 58e744b..936dc0b 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -277,6 +277,7 @@ enum vmscan_throttle_state {
         VMSCAN_THROTTLE_WRITEBACK,
         VMSCAN_THROTTLE_ISOLATED,
         VMSCAN_THROTTLE_NOPROGRESS,
+       VMSCAN_THROTTLE_CONGESTED,
         NR_VMSCAN_THROTTLE,
  };
  
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h

index f25a614..ca2e900 100644 (file)
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -30,12 +30,14 @@
  #define _VMSCAN_THROTTLE_WRITEBACK     (1 << VMSCAN_THROTTLE_WRITEBACK)
  #define _VMSCAN_THROTTLE_ISOLATED      (1 << VMSCAN_THROTTLE_ISOLATED)
  #define _VMSCAN_THROTTLE_NOPROGRESS    (1 << VMSCAN_THROTTLE_NOPROGRESS)
+#define _VMSCAN_THROTTLE_CONGESTED     (1 << VMSCAN_THROTTLE_CONGESTED)
  
  #define show_throttle_flags(flags)                                             \
         (flags) ? __print_flags(flags, "|",                                     \
                 {_VMSCAN_THROTTLE_WRITEBACK,    "VMSCAN_THROTTLE_WRITEBACK"},   \
                 {_VMSCAN_THROTTLE_ISOLATED,     "VMSCAN_THROTTLE_ISOLATED"},    \
-               {_VMSCAN_THROTTLE_NOPROGRESS,   "VMSCAN_THROTTLE_NOPROGRESS"}   \
+               {_VMSCAN_THROTTLE_NOPROGRESS,   "VMSCAN_THROTTLE_NOPROGRESS"},  \
+               {_VMSCAN_THROTTLE_CONGESTED,    "VMSCAN_THROTTLE_CONGESTED"}    \
                 ) : "VMSCAN_THROTTLE_NONE"
  
  
diff --git a/mm/vmscan.c b/mm/vmscan.c

index fb95846..4c4d5f6 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1021,6 +1021,39 @@ static void handle_write_error(struct address_space *mapping,
         unlock_page(page);
  }
  
+static bool skip_throttle_noprogress(pg_data_t *pgdat)
+{
+       int reclaimable = 0, write_pending = 0;
+       int i;
+
+       /*
+        * If kswapd is disabled, reschedule if necessary but do not
+        * throttle as the system is likely near OOM.
+        */
+       if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+               return true;
+
+       /*
+        * If there are a lot of dirty/writeback pages then do not
+        * throttle as throttling will occur when the pages cycle
+        * towards the end of the LRU if still under writeback.
+        */
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               struct zone *zone = pgdat->node_zones + i;
+
+               if (!populated_zone(zone))
+                       continue;
+
+               reclaimable += zone_reclaimable_pages(zone);
+               write_pending += zone_page_state_snapshot(zone,
+                                                 NR_ZONE_WRITE_PENDING);
+       }
+       if (2 * write_pending <= reclaimable)
+               return true;
+
+       return false;
+}
+
  void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
  {
         wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
@@ -1056,8 +1089,16 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
                 }
  
                 break;
+       case VMSCAN_THROTTLE_CONGESTED:
+               fallthrough;
         case VMSCAN_THROTTLE_NOPROGRESS:
-               timeout = HZ/2;
+               if (skip_throttle_noprogress(pgdat)) {
+                       cond_resched();
+                       return;
+               }
+
+               timeout = 1;
+
                 break;
         case VMSCAN_THROTTLE_ISOLATED:
                 timeout = HZ/50;
@@ -3321,7 +3362,7 @@ again:
         if (!current_is_kswapd() && current_may_throttle() &&
             !sc->hibernation_mode &&
             test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
-               reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
+               reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED);
  
         if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
                                     sc))
@@ -3386,16 +3427,16 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc)
         }
  
         /*
-        * Do not throttle kswapd on NOPROGRESS as it will throttle on
-        * VMSCAN_THROTTLE_WRITEBACK if there are too many pages under
-        * writeback and marked for immediate reclaim at the tail of
-        * the LRU.
+        * Do not throttle kswapd or cgroup reclaim on NOPROGRESS as it will
+        * throttle on VMSCAN_THROTTLE_WRITEBACK if there are too many pages
+        * under writeback and marked for immediate reclaim at the tail of the
+        * LRU.
          */
-       if (current_is_kswapd())
+       if (current_is_kswapd() || cgroup_reclaim(sc))
                 return;
  
         /* Throttle if making no progress at high prioities. */
-       if (sc->priority < DEF_PRIORITY - 2)
+       if (sc->priority == 1 && !sc->nr_reclaimed)
                 reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS);
  }
  
@@ -3415,6 +3456,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
         unsigned long nr_soft_scanned;
         gfp_t orig_mask;
         pg_data_t *last_pgdat = NULL;
+       pg_data_t *first_pgdat = NULL;
  
         /*
          * If the number of buffer_heads in the machine exceeds the maximum
@@ -3478,14 +3520,18 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                         /* need some check for avoid more shrink_zone() */
                 }
  
+               if (!first_pgdat)
+                       first_pgdat = zone->zone_pgdat;
+
                 /* See comment about same check for global reclaim above */
                 if (zone->zone_pgdat == last_pgdat)
                         continue;
                 last_pgdat = zone->zone_pgdat;
                 shrink_node(zone->zone_pgdat, sc);
-               consider_reclaim_throttle(zone->zone_pgdat, sc);
         }
  
+       consider_reclaim_throttle(first_pgdat, sc);
+
         /*
          * Restore to original mask to avoid the impact on the caller if we
          * promoted it to __GFP_HIGHMEM.
author	Mel Gorman <mgorman@techsingularity.net>
	Thu, 2 Dec 2021 15:06:14 +0000 (15:06 +0000)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 31 Dec 2021 19:17:07 +0000 (11:17 -0800)
include/linux/mmzone.h		patch \| blob \| history
include/trace/events/vmscan.h		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history