mm: madvise: fix MADV_WILLNEED on shmem swapouts

[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 7e7f947..7b2611a 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1869,7 +1869,7 @@ static void __paginginit init_zone_allows_reclaim(int nid)
  {
         int i;
  
-       for_each_online_node(i)
+       for_each_node_state(i, N_MEMORY)
                 if (node_distance(nid, i) <= RECLAIM_DISTANCE)
                         node_set(i, NODE_DATA(nid)->reclaim_nodes);
                 else
@@ -1957,7 +1957,7 @@ zonelist_scan:
                 if (alloc_flags & ALLOC_FAIR) {
                         if (!zone_local(preferred_zone, zone))
                                 continue;
-                       if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+                       if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0)
                                 continue;
                 }
                 /*
@@ -2196,6 +2196,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
         }
  
         /*
+        * PM-freezer should be notified that there might be an OOM killer on
+        * its way to kill and wake somebody up. This is too early and we might
+        * end up not killing anything but false positives are acceptable.
+        * See freeze_processes.
+        */
+       note_oom_kill();
+
+       /*
          * Go through the zonelist yet one more time, keep very high watermark
          * here, this is only to catch a parallel oom killing, we must fail if
          * we're still under heavy pressure.
@@ -2434,7 +2442,7 @@ static inline int
  gfp_to_alloc_flags(gfp_t gfp_mask)
  {
         int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
-       const gfp_t wait = gfp_mask & __GFP_WAIT;
+       const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
  
         /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
         BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2443,20 +2451,20 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
          * The caller may dip into page reserves a bit more if the caller
          * cannot run direct reclaim, or if the caller has realtime scheduling
          * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
-        * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
+        * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH).
          */
         alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
  
-       if (!wait) {
+       if (atomic) {
                 /*
-                * Not worth trying to allocate harder for
-                * __GFP_NOMEMALLOC even if it can't schedule.
+                * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
+                * if it can't schedule.
                  */
-               if  (!(gfp_mask & __GFP_NOMEMALLOC))
+               if (!(gfp_mask & __GFP_NOMEMALLOC))
                         alloc_flags |= ALLOC_HARDER;
                 /*
-                * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
-                * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+                * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
+                * comment for __cpuset_node_allowed_softwall().
                  */
                 alloc_flags &= ~ALLOC_CPUSET;
         } else if (unlikely(rt_task(current)) && !in_interrupt())
@@ -2736,7 +2744,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
                 return NULL;
  
  retry_cpuset:
-       cpuset_mems_cookie = get_mems_allowed();
+       cpuset_mems_cookie = read_mems_allowed_begin();
  
         /* The preferred zone is used for statistics later */
         first_zones_zonelist(zonelist, high_zoneidx,
@@ -2791,7 +2799,7 @@ out:
          * the mask is being updated. If a page allocation is about to fail,
          * check if the cpuset changed during allocation and if so, retry.
          */
-       if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
+       if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
                 goto retry_cpuset;
  
         memcg_kmem_commit_charge(page, memcg, order);
@@ -3059,9 +3067,9 @@ bool skip_free_areas_node(unsigned int flags, int nid)
                 goto out;
  
         do {
-               cpuset_mems_cookie = get_mems_allowed();
+               cpuset_mems_cookie = read_mems_allowed_begin();
                 ret = !node_isset(nid, cpuset_current_mems_allowed);
-       } while (!put_mems_allowed(cpuset_mems_cookie));
+       } while (read_mems_allowed_retry(cpuset_mems_cookie));
  out:
         return ret;
  }
@@ -4933,7 +4941,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
  
         pgdat->node_id = nid;
         pgdat->node_start_pfn = node_start_pfn;
-       init_zone_allows_reclaim(nid);
+       if (node_state(nid, N_MEMORY))
+               init_zone_allows_reclaim(nid);
  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
         get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
  #endif
@@ -5661,9 +5670,8 @@ static void __setup_per_zone_wmarks(void)
                 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
  
                 __mod_zone_page_state(zone, NR_ALLOC_BATCH,
-                                     high_wmark_pages(zone) -
-                                     low_wmark_pages(zone) -
-                                     zone_page_state(zone, NR_ALLOC_BATCH));
+                       high_wmark_pages(zone) - low_wmark_pages(zone) -
+                       atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
  
                 setup_zone_migrate_reserve(zone);
                 spin_unlock_irqrestore(&zone->lock, flags);