x86: add tizen_qemu_x86_defconfig & tizen_qemu_x86_64_defconfig

[platform/kernel/linux-rpi.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index eeae2f6..201acea 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -41,6 +41,7 @@
  #include <linux/kthread.h>
  #include <linux/freezer.h>
  #include <linux/memcontrol.h>
+#include <linux/migrate.h>
  #include <linux/delayacct.h>
  #include <linux/sysctl.h>
  #include <linux/oom.h>
@@ -121,6 +122,9 @@ struct scan_control {
         /* The file pages on the current node are dangerously low */
         unsigned int file_is_tiny:1;
  
+       /* Always discard instead of demoting to lower tier memory */
+       unsigned int no_demotion:1;
+
         /* Allocation order */
         s8 order;
  
@@ -518,6 +522,48 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker,
         return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]);
  }
  
+static bool can_demote(int nid, struct scan_control *sc)
+{
+       if (!numa_demotion_enabled)
+               return false;
+       if (sc) {
+               if (sc->no_demotion)
+                       return false;
+               /* It is pointless to do demotion in memcg reclaim */
+               if (cgroup_reclaim(sc))
+                       return false;
+       }
+       if (next_demotion_node(nid) == NUMA_NO_NODE)
+               return false;
+
+       return true;
+}
+
+static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
+                                         int nid,
+                                         struct scan_control *sc)
+{
+       if (memcg == NULL) {
+               /*
+                * For non-memcg reclaim, is there
+                * space in any swap device?
+                */
+               if (get_nr_swap_pages() > 0)
+                       return true;
+       } else {
+               /* Is the memcg below its swap limit? */
+               if (mem_cgroup_get_nr_swap_pages(memcg) > 0)
+                       return true;
+       }
+
+       /*
+        * The page can not be swapped.
+        *
+        * Can it be reclaimed from this node via demotion?
+        */
+       return can_demote(nid, sc);
+}
+
  /*
   * This misses isolated pages which are not accounted for to save counters.
   * As the data only determines if reclaim or compaction continues, it is
@@ -529,7 +575,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
  
         nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
                 zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
-       if (get_nr_swap_pages() > 0)
+       if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL))
                 nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
                         zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
  
@@ -893,6 +939,7 @@ out:
  void drop_slab_node(int nid)
  {
         unsigned long freed;
+       int shift = 0;
  
         do {
                 struct mem_cgroup *memcg = NULL;
@@ -905,7 +952,7 @@ void drop_slab_node(int nid)
                 do {
                         freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
                 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
-       } while (freed > 10);
+       } while ((freed >> shift++) > 1);
  }
  
  void drop_slab(void)
@@ -1052,14 +1099,13 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
  static int __remove_mapping(struct address_space *mapping, struct page *page,
                             bool reclaimed, struct mem_cgroup *target_memcg)
  {
-       unsigned long flags;
         int refcount;
         void *shadow = NULL;
  
         BUG_ON(!PageLocked(page));
         BUG_ON(mapping != page_mapping(page));
  
-       xa_lock_irqsave(&mapping->i_pages, flags);
+       xa_lock_irq(&mapping->i_pages);
         /*
          * The non racy check for a busy page.
          *
@@ -1100,7 +1146,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                 if (reclaimed && !mapping_exiting(mapping))
                         shadow = workingset_eviction(page, target_memcg);
                 __delete_from_swap_cache(page, swap, shadow);
-               xa_unlock_irqrestore(&mapping->i_pages, flags);
+               xa_unlock_irq(&mapping->i_pages);
                 put_swap_page(page, swap);
         } else {
                 void (*freepage)(struct page *);
@@ -1126,7 +1172,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                     !mapping_exiting(mapping) && !dax_mapping(mapping))
                         shadow = workingset_eviction(page, target_memcg);
                 __delete_from_page_cache(page, shadow);
-               xa_unlock_irqrestore(&mapping->i_pages, flags);
+               xa_unlock_irq(&mapping->i_pages);
  
                 if (freepage != NULL)
                         freepage(page);
@@ -1135,7 +1181,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
         return 1;
  
  cannot_free:
-       xa_unlock_irqrestore(&mapping->i_pages, flags);
+       xa_unlock_irq(&mapping->i_pages);
         return 0;
  }
  
@@ -1264,6 +1310,54 @@ static void page_check_dirty_writeback(struct page *page,
                 mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
  }
  
+static struct page *alloc_demote_page(struct page *page, unsigned long node)
+{
+       struct migration_target_control mtc = {
+               /*
+                * Allocate from 'node', or fail quickly and quietly.
+                * When this happens, 'page' will likely just be discarded
+                * instead of migrated.
+                */
+               .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
+                           __GFP_THISNODE  | __GFP_NOWARN |
+                           __GFP_NOMEMALLOC | GFP_NOWAIT,
+               .nid = node
+       };
+
+       return alloc_migration_target(page, (unsigned long)&mtc);
+}
+
+/*
+ * Take pages on @demote_list and attempt to demote them to
+ * another node.  Pages which are not demoted are left on
+ * @demote_pages.
+ */
+static unsigned int demote_page_list(struct list_head *demote_pages,
+                                    struct pglist_data *pgdat)
+{
+       int target_nid = next_demotion_node(pgdat->node_id);
+       unsigned int nr_succeeded;
+       int err;
+
+       if (list_empty(demote_pages))
+               return 0;
+
+       if (target_nid == NUMA_NO_NODE)
+               return 0;
+
+       /* Demotion ignores all cpuset and mempolicy settings */
+       err = migrate_pages(demote_pages, alloc_demote_page, NULL,
+                           target_nid, MIGRATE_ASYNC, MR_DEMOTION,
+                           &nr_succeeded);
+
+       if (current_is_kswapd())
+               __count_vm_events(PGDEMOTE_KSWAPD, nr_succeeded);
+       else
+               __count_vm_events(PGDEMOTE_DIRECT, nr_succeeded);
+
+       return nr_succeeded;
+}
+
  /*
   * shrink_page_list() returns the number of reclaimed pages
   */
@@ -1275,12 +1369,16 @@ static unsigned int shrink_page_list(struct list_head *page_list,
  {
         LIST_HEAD(ret_pages);
         LIST_HEAD(free_pages);
+       LIST_HEAD(demote_pages);
         unsigned int nr_reclaimed = 0;
         unsigned int pgactivate = 0;
+       bool do_demote_pass;
  
         memset(stat, 0, sizeof(*stat));
         cond_resched();
+       do_demote_pass = can_demote(pgdat->node_id, sc);
  
+retry:
         while (!list_empty(page_list)) {
                 struct address_space *mapping;
                 struct page *page;
@@ -1430,6 +1528,17 @@ static unsigned int shrink_page_list(struct list_head *page_list,
                 }
  
                 /*
+                * Before reclaiming the page, try to relocate
+                * its contents to another node.
+                */
+               if (do_demote_pass &&
+                   (thp_migration_supported() || !PageTransHuge(page))) {
+                       list_add(&page->lru, &demote_pages);
+                       unlock_page(page);
+                       continue;
+               }
+
+               /*
                  * Anonymous process memory has backing store?
                  * Try to allocate it some swap space here.
                  * Lazyfree page could be freed directly
@@ -1624,11 +1733,14 @@ static unsigned int shrink_page_list(struct list_head *page_list,
                         /* follow __remove_mapping for reference */
                         if (!page_ref_freeze(page, 1))
                                 goto keep_locked;
-                       if (PageDirty(page)) {
-                               page_ref_unfreeze(page, 1);
-                               goto keep_locked;
-                       }
-
+                       /*
+                        * The page has only one reference left, which is
+                        * from the isolation. After the caller puts the
+                        * page back on lru and drops the reference, the
+                        * page will be freed anyway. It doesn't matter
+                        * which lru it goes. So we don't bother checking
+                        * PageDirty here.
+                        */
                         count_vm_event(PGLAZYFREED);
                         count_memcg_page_event(page, PGLAZYFREED);
                 } else if (!mapping || !__remove_mapping(mapping, page, true,
@@ -1680,6 +1792,17 @@ keep:
                 list_add(&page->lru, &ret_pages);
                 VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
         }
+       /* 'page_list' is always empty here */
+
+       /* Migrate pages selected for demotion */
+       nr_reclaimed += demote_page_list(&demote_pages, pgdat);
+       /* Pages that could not be demoted are still in @demote_pages */
+       if (!list_empty(&demote_pages)) {
+               /* Pages which failed to demoted go back on @page_list for retry: */
+               list_splice_init(&demote_pages, page_list);
+               do_demote_pass = false;
+               goto retry;
+       }
  
         pgactivate = stat->nr_activate[0] + stat->nr_activate[1];
  
@@ -1698,7 +1821,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
  {
         struct scan_control sc = {
                 .gfp_mask = GFP_KERNEL,
-               .priority = DEF_PRIORITY,
                 .may_unmap = 1,
         };
         struct reclaim_stat stat;
@@ -1744,69 +1866,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
  }
  
  /*
- * Attempt to remove the specified page from its LRU.  Only take this page
- * if it is of the appropriate PageActive status.  Pages which are being
- * freed elsewhere are also ignored.
- *
- * page:       page to consider
- * mode:       one of the LRU isolation modes defined above
- *
- * returns true on success, false on failure.
- */
-bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
-{
-       /* Only take pages on the LRU. */
-       if (!PageLRU(page))
-               return false;
-
-       /* Compaction should not handle unevictable pages but CMA can do so */
-       if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
-               return false;
-
-       /*
-        * To minimise LRU disruption, the caller can indicate that it only
-        * wants to isolate pages it will be able to operate on without
-        * blocking - clean pages for the most part.
-        *
-        * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
-        * that it is possible to migrate without blocking
-        */
-       if (mode & ISOLATE_ASYNC_MIGRATE) {
-               /* All the caller can do on PageWriteback is block */
-               if (PageWriteback(page))
-                       return false;
-
-               if (PageDirty(page)) {
-                       struct address_space *mapping;
-                       bool migrate_dirty;
-
-                       /*
-                        * Only pages without mappings or that have a
-                        * ->migratepage callback are possible to migrate
-                        * without blocking. However, we can be racing with
-                        * truncation so it's necessary to lock the page
-                        * to stabilise the mapping as truncation holds
-                        * the page lock until after the page is removed
-                        * from the page cache.
-                        */
-                       if (!trylock_page(page))
-                               return false;
-
-                       mapping = page_mapping(page);
-                       migrate_dirty = !mapping || mapping->a_ops->migratepage;
-                       unlock_page(page);
-                       if (!migrate_dirty)
-                               return false;
-               }
-       }
-
-       if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
-               return false;
-
-       return true;
-}
-
-/*
   * Update LRU sizes after isolating pages. The LRU size updates must
   * be complete before mem_cgroup_update_lru_size due to a sanity check.
   */
@@ -1857,11 +1916,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
         unsigned long skipped = 0;
         unsigned long scan, total_scan, nr_pages;
         LIST_HEAD(pages_skipped);
-       isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
  
         total_scan = 0;
         scan = 0;
         while (scan < nr_to_scan && !list_empty(src)) {
+               struct list_head *move_to = src;
                 struct page *page;
  
                 page = lru_to_page(src);
@@ -1871,9 +1930,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                 total_scan += nr_pages;
  
                 if (page_zonenum(page) > sc->reclaim_idx) {
-                       list_move(&page->lru, &pages_skipped);
                         nr_skipped[page_zonenum(page)] += nr_pages;
-                       continue;
+                       move_to = &pages_skipped;
+                       goto move;
                 }
  
                 /*
@@ -1881,37 +1940,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                  * return with no isolated pages if the LRU mostly contains
                  * ineligible pages.  This causes the VM to not reclaim any
                  * pages, triggering a premature OOM.
-                *
-                * Account all tail pages of THP.  This would not cause
-                * premature OOM since __isolate_lru_page() returns -EBUSY
-                * only when the page is being freed somewhere else.
+                * Account all tail pages of THP.
                  */
                 scan += nr_pages;
-               if (!__isolate_lru_page_prepare(page, mode)) {
-                       /* It is being freed elsewhere */
-                       list_move(&page->lru, src);
-                       continue;
-               }
+
+               if (!PageLRU(page))
+                       goto move;
+               if (!sc->may_unmap && page_mapped(page))
+                       goto move;
+
                 /*
                  * Be careful not to clear PageLRU until after we're
                  * sure the page is not being freed elsewhere -- the
                  * page release code relies on it.
                  */
-               if (unlikely(!get_page_unless_zero(page))) {
-                       list_move(&page->lru, src);
-                       continue;
-               }
+               if (unlikely(!get_page_unless_zero(page)))
+                       goto move;
  
                 if (!TestClearPageLRU(page)) {
                         /* Another thread is already isolating this page */
                         put_page(page);
-                       list_move(&page->lru, src);
-                       continue;
+                       goto move;
                 }
  
                 nr_taken += nr_pages;
                 nr_zone_taken[page_zonenum(page)] += nr_pages;
-               list_move(&page->lru, dst);
+               move_to = dst;
+move:
+               list_move(&page->lru, move_to);
         }
  
         /*
@@ -1935,7 +1991,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
         }
         *nr_scanned = total_scan;
         trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
-                                   total_scan, skipped, nr_taken, mode, lru);
+                                   total_scan, skipped, nr_taken,
+                                   sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru);
         update_lru_sizes(lruvec, lru, nr_zone_taken);
         return nr_taken;
  }
@@ -2323,10 +2380,10 @@ unsigned long reclaim_pages(struct list_head *page_list)
         unsigned int noreclaim_flag;
         struct scan_control sc = {
                 .gfp_mask = GFP_KERNEL,
-               .priority = DEF_PRIORITY,
                 .may_writepage = 1,
                 .may_unmap = 1,
                 .may_swap = 1,
+               .no_demotion = 1,
         };
  
         noreclaim_flag = memalloc_noreclaim_save();
@@ -2452,6 +2509,7 @@ enum scan_balance {
  static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
                            unsigned long *nr)
  {
+       struct pglist_data *pgdat = lruvec_pgdat(lruvec);
         struct mem_cgroup *memcg = lruvec_memcg(lruvec);
         unsigned long anon_cost, file_cost, total_cost;
         int swappiness = mem_cgroup_swappiness(memcg);
@@ -2462,7 +2520,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         enum lru_list lru;
  
         /* If we have no swap space, do not bother scanning anon pages. */
-       if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) {
+       if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) {
                 scan_balance = SCAN_FILE;
                 goto out;
         }
@@ -2592,7 +2650,7 @@ out:
                         cgroup_size = max(cgroup_size, protection);
  
                         scan = lruvec_size - lruvec_size * protection /
-                               cgroup_size;
+                               (cgroup_size + 1);
  
                         /*
                          * Minimally target SWAP_CLUSTER_MAX pages to keep
@@ -2645,6 +2703,21 @@ out:
         }
  }
  
+/*
+ * Anonymous LRU management is a waste if there is
+ * ultimately no way to reclaim the memory.
+ */
+static bool can_age_anon_pages(struct pglist_data *pgdat,
+                              struct scan_control *sc)
+{
+       /* Aging the anon LRU is valuable if swap is present: */
+       if (total_swap_pages > 0)
+               return true;
+
+       /* Also valuable if anon pages can be demoted: */
+       return can_demote(pgdat->node_id, sc);
+}
+
  static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
  {
         unsigned long nr[NR_LRU_LISTS];
@@ -2653,8 +2726,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
         enum lru_list lru;
         unsigned long nr_reclaimed = 0;
         unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+       bool proportional_reclaim;
         struct blk_plug plug;
-       bool scan_adjusted;
  
         get_scan_count(lruvec, sc, nr);
  
@@ -2672,8 +2745,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
          * abort proportional reclaim if either the file or anon lru has already
          * dropped to zero at the first pass.
          */
-       scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
-                        sc->priority == DEF_PRIORITY);
+       proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+                               sc->priority == DEF_PRIORITY);
  
         blk_start_plug(&plug);
         while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2693,7 +2766,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
  
                 cond_resched();
  
-               if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
+               if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
                         continue;
  
                 /*
@@ -2744,8 +2817,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
                 nr_scanned = targets[lru] - nr[lru];
                 nr[lru] = targets[lru] * (100 - percentage) / 100;
                 nr[lru] -= min(nr[lru], nr_scanned);
-
-               scan_adjusted = true;
         }
         blk_finish_plug(&plug);
         sc->nr_reclaimed += nr_reclaimed;
@@ -2754,7 +2825,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
          * Even if we did not try to evict anon pages at all, we want to
          * rebalance the anon lru active/inactive ratio.
          */
-       if (total_swap_pages && inactive_is_low(lruvec, LRU_INACTIVE_ANON))
+       if (can_age_anon_pages(lruvec_pgdat(lruvec), sc) &&
+           inactive_is_low(lruvec, LRU_INACTIVE_ANON))
                 shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
                                    sc, LRU_ACTIVE_ANON);
  }
@@ -2824,7 +2896,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
          */
         pages_for_compaction = compact_gap(sc->order);
         inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
-       if (get_nr_swap_pages() > 0)
+       if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
                 inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
  
         return inactive_lru_pages > pages_for_compaction;
@@ -2898,6 +2970,12 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
         target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
  
  again:
+       /*
+        * Flush the memory cgroup stats, so that we read accurate per-memcg
+        * lruvec stats for heuristics.
+        */
+       mem_cgroup_flush_stats();
+
         memset(&sc->nr, 0, sizeof(sc->nr));
  
         nr_reclaimed = sc->nr_reclaimed;
@@ -3434,18 +3512,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
          * blocked waiting on the same lock. Instead, throttle for up to a
          * second before continuing.
          */
-       if (!(gfp_mask & __GFP_FS)) {
+       if (!(gfp_mask & __GFP_FS))
                 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
                         allow_direct_reclaim(pgdat), HZ);
+       else
+               /* Throttle until kswapd wakes the process */
+               wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
+                       allow_direct_reclaim(pgdat));
  
-               goto check_pending;
-       }
-
-       /* Throttle until kswapd wakes the process */
-       wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
-               allow_direct_reclaim(pgdat));
-
-check_pending:
         if (fatal_signal_pending(current))
                 return true;
  
@@ -3583,7 +3657,7 @@ static void age_active_anon(struct pglist_data *pgdat,
         struct mem_cgroup *memcg;
         struct lruvec *lruvec;
  
-       if (!total_swap_pages)
+       if (!can_age_anon_pages(pgdat, sc))
                 return;
  
         lruvec = mem_cgroup_lruvec(NULL, pgdat);
@@ -3812,7 +3886,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
  
         set_task_reclaim_state(current, &sc.reclaim_state);
         psi_memstall_enter(&pflags);
-       __fs_reclaim_acquire();
+       __fs_reclaim_acquire(_THIS_IP_);
  
         count_vm_event(PAGEOUTRUN);
  
@@ -3938,9 +4012,9 @@ restart:
                         wake_up_all(&pgdat->pfmemalloc_wait);
  
                 /* Check if kswapd should be suspending */
-               __fs_reclaim_release();
+               __fs_reclaim_release(_THIS_IP_);
                 ret = try_to_freeze();
-               __fs_reclaim_acquire();
+               __fs_reclaim_acquire(_THIS_IP_);
                 if (ret || kthread_should_stop())
                         break;
  
@@ -3992,7 +4066,7 @@ out:
         }
  
         snapshot_refaults(NULL, pgdat);
-       __fs_reclaim_release();
+       __fs_reclaim_release(_THIS_IP_);
         psi_memstall_leave(&pflags);
         set_task_reclaim_state(current, NULL);
  
@@ -4290,23 +4364,20 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
   * This kswapd start function will be called by init and node-hot-add.
   * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
   */
-int kswapd_run(int nid)
+void kswapd_run(int nid)
  {
         pg_data_t *pgdat = NODE_DATA(nid);
-       int ret = 0;
  
         if (pgdat->kswapd)
-               return 0;
+               return;
  
         pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
         if (IS_ERR(pgdat->kswapd)) {
                 /* failure at boot is fatal */
                 BUG_ON(system_state < SYSTEM_RUNNING);
                 pr_err("Failed to start kswapd on node %d\n", nid);
-               ret = PTR_ERR(pgdat->kswapd);
                 pgdat->kswapd = NULL;
         }
-       return ret;
  }
  
  /*