mm/memory-failure.c: fix memory leak by race between poison and unpoison
[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / page_alloc.c
index 533e214..7387a67 100644 (file)
@@ -205,7 +205,7 @@ static char * const zone_names[MAX_NR_ZONES] = {
 };
 
 int min_free_kbytes = 1024;
-int user_min_free_kbytes;
+int user_min_free_kbytes = -1;
 
 static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
@@ -295,7 +295,7 @@ static inline int bad_range(struct zone *zone, struct page *page)
 }
 #endif
 
-static void bad_page(struct page *page)
+static void bad_page(struct page *page, char *reason, unsigned long bad_flags)
 {
        static unsigned long resume;
        static unsigned long nr_shown;
@@ -329,7 +329,7 @@ static void bad_page(struct page *page)
 
        printk(KERN_ALERT "BUG: Bad page state in process %s  pfn:%05lx\n",
                current->comm, page_to_pfn(page));
-       dump_page(page);
+       dump_page_badflags(page, reason, bad_flags);
 
        print_modules();
        dump_stack();
@@ -369,9 +369,11 @@ void prep_compound_page(struct page *page, unsigned long order)
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
-               __SetPageTail(p);
                set_page_count(p, 0);
                p->first_page = page;
+               /* Make sure p->first_page is always valid for PageTail() */
+               smp_wmb();
+               __SetPageTail(p);
        }
 }
 
@@ -383,7 +385,7 @@ static int destroy_compound_page(struct page *page, unsigned long order)
        int bad = 0;
 
        if (unlikely(compound_order(page) != order)) {
-               bad_page(page);
+               bad_page(page, "wrong compound order", 0);
                bad++;
        }
 
@@ -392,8 +394,11 @@ static int destroy_compound_page(struct page *page, unsigned long order)
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
 
-               if (unlikely(!PageTail(p) || (p->first_page != page))) {
-                       bad_page(page);
+               if (unlikely(!PageTail(p))) {
+                       bad_page(page, "PageTail not set", 0);
+                       bad++;
+               } else if (unlikely(p->first_page != page)) {
+                       bad_page(page, "first_page not consistent", 0);
                        bad++;
                }
                __ClearPageTail(p);
@@ -506,12 +511,12 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
                return 0;
 
        if (page_is_guard(buddy) && page_order(buddy) == order) {
-               VM_BUG_ON(page_count(buddy) != 0);
+               VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
                return 1;
        }
 
        if (PageBuddy(buddy) && page_order(buddy) == order) {
-               VM_BUG_ON(page_count(buddy) != 0);
+               VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
                return 1;
        }
        return 0;
@@ -561,8 +566,8 @@ static inline void __free_one_page(struct page *page,
 
        page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
 
-       VM_BUG_ON(page_idx & ((1 << order) - 1));
-       VM_BUG_ON(bad_range(zone, page));
+       VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
+       VM_BUG_ON_PAGE(bad_range(zone, page), page);
 
        while (order < MAX_ORDER-1) {
                buddy_idx = __find_buddy_index(page_idx, order);
@@ -618,12 +623,23 @@ out:
 
 static inline int free_pages_check(struct page *page)
 {
-       if (unlikely(page_mapcount(page) |
-               (page->mapping != NULL)  |
-               (atomic_read(&page->_count) != 0) |
-               (page->flags & PAGE_FLAGS_CHECK_AT_FREE) |
-               (mem_cgroup_bad_page_check(page)))) {
-               bad_page(page);
+       char *bad_reason = NULL;
+       unsigned long bad_flags = 0;
+
+       if (unlikely(page_mapcount(page)))
+               bad_reason = "nonzero mapcount";
+       if (unlikely(page->mapping != NULL))
+               bad_reason = "non-NULL mapping";
+       if (unlikely(atomic_read(&page->_count) != 0))
+               bad_reason = "nonzero _count";
+       if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) {
+               bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
+               bad_flags = PAGE_FLAGS_CHECK_AT_FREE;
+       }
+       if (unlikely(mem_cgroup_bad_page_check(page)))
+               bad_reason = "cgroup check failed";
+       if (unlikely(bad_reason)) {
+               bad_page(page, bad_reason, bad_flags);
                return 1;
        }
        page_cpupid_reset_last(page);
@@ -813,7 +829,7 @@ static inline void expand(struct zone *zone, struct page *page,
                area--;
                high--;
                size >>= 1;
-               VM_BUG_ON(bad_range(zone, &page[size]));
+               VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
                if (high < debug_guardpage_minorder()) {
@@ -843,12 +859,23 @@ static inline void expand(struct zone *zone, struct page *page,
  */
 static inline int check_new_page(struct page *page)
 {
-       if (unlikely(page_mapcount(page) |
-               (page->mapping != NULL)  |
-               (atomic_read(&page->_count) != 0)  |
-               (page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
-               (mem_cgroup_bad_page_check(page)))) {
-               bad_page(page);
+       char *bad_reason = NULL;
+       unsigned long bad_flags = 0;
+
+       if (unlikely(page_mapcount(page)))
+               bad_reason = "nonzero mapcount";
+       if (unlikely(page->mapping != NULL))
+               bad_reason = "non-NULL mapping";
+       if (unlikely(atomic_read(&page->_count) != 0))
+               bad_reason = "nonzero _count";
+       if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
+               bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
+               bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
+       }
+       if (unlikely(mem_cgroup_bad_page_check(page)))
+               bad_reason = "cgroup check failed";
+       if (unlikely(bad_reason)) {
+               bad_page(page, bad_reason, bad_flags);
                return 1;
        }
        return 0;
@@ -955,7 +982,7 @@ int move_freepages(struct zone *zone,
 
        for (page = start_page; page <= end_page;) {
                /* Make sure we are not inadvertently changing nodes */
-               VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone));
+               VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
 
                if (!pfn_valid_within(page_to_pfn(page))) {
                        page++;
@@ -1404,8 +1431,8 @@ void split_page(struct page *page, unsigned int order)
 {
        int i;
 
-       VM_BUG_ON(PageCompound(page));
-       VM_BUG_ON(!page_count(page));
+       VM_BUG_ON_PAGE(PageCompound(page), page);
+       VM_BUG_ON_PAGE(!page_count(page), page);
 
 #ifdef CONFIG_KMEMCHECK
        /*
@@ -1548,11 +1575,12 @@ again:
        }
 
        __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+
        __count_zone_vm_events(PGALLOC, zone, 1 << order);
        zone_statistics(preferred_zone, zone, gfp_flags);
        local_irq_restore(flags);
 
-       VM_BUG_ON(bad_range(zone, page));
+       VM_BUG_ON_PAGE(bad_range(zone, page), page);
        if (prep_new_page(page, order, gfp_flags))
                goto again;
        return page;
@@ -1912,19 +1940,12 @@ zonelist_scan:
                 * zone size to ensure fair page aging.  The zone a
                 * page was allocated in should have no effect on the
                 * time the page has in memory before being reclaimed.
-                *
-                * Try to stay in local zones in the fastpath.  If
-                * that fails, the slowpath is entered, which will do
-                * another pass starting with the local zones, but
-                * ultimately fall back to remote zones that do not
-                * partake in the fairness round-robin cycle of this
-                * zonelist.
                 */
-               if (alloc_flags & ALLOC_WMARK_LOW) {
-                       if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
-                               continue;
+               if (alloc_flags & ALLOC_FAIR) {
                        if (!zone_local(preferred_zone, zone))
                                continue;
+                       if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+                               continue;
                }
                /*
                 * When allocating a page cache page for writing, we
@@ -2362,32 +2383,40 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
        return page;
 }
 
-static void prepare_slowpath(gfp_t gfp_mask, unsigned int order,
-                            struct zonelist *zonelist,
-                            enum zone_type high_zoneidx,
-                            struct zone *preferred_zone)
+static void reset_alloc_batches(struct zonelist *zonelist,
+                               enum zone_type high_zoneidx,
+                               struct zone *preferred_zone)
 {
        struct zoneref *z;
        struct zone *zone;
 
        for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
-               if (!(gfp_mask & __GFP_NO_KSWAPD))
-                       wakeup_kswapd(zone, order, zone_idx(preferred_zone));
                /*
                 * Only reset the batches of zones that were actually
-                * considered in the fast path, we don't want to
-                * thrash fairness information for zones that are not
+                * considered in the fairness pass, we don't want to
+                * trash fairness information for zones that are not
                 * actually part of this zonelist's round-robin cycle.
                 */
                if (!zone_local(preferred_zone, zone))
                        continue;
                mod_zone_page_state(zone, NR_ALLOC_BATCH,
-                                   high_wmark_pages(zone) -
-                                   low_wmark_pages(zone) -
-                                   zone_page_state(zone, NR_ALLOC_BATCH));
+                       high_wmark_pages(zone) - low_wmark_pages(zone) -
+                       atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
        }
 }
 
+static void wake_all_kswapds(unsigned int order,
+                            struct zonelist *zonelist,
+                            enum zone_type high_zoneidx,
+                            struct zone *preferred_zone)
+{
+       struct zoneref *z;
+       struct zone *zone;
+
+       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
+               wakeup_kswapd(zone, order, zone_idx(preferred_zone));
+}
+
 static inline int
 gfp_to_alloc_flags(gfp_t gfp_mask)
 {
@@ -2477,12 +2506,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         * over allocated.
         */
        if (IS_ENABLED(CONFIG_NUMA) &&
-                       (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
+           (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
                goto nopage;
 
 restart:
-       prepare_slowpath(gfp_mask, order, zonelist,
-                        high_zoneidx, preferred_zone);
+       if (!(gfp_mask & __GFP_NO_KSWAPD))
+               wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone);
 
        /*
         * OK, we're below the kswapd watermark and have kicked background
@@ -2666,7 +2695,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
        struct page *page = NULL;
        int migratetype = allocflags_to_migratetype(gfp_mask);
        unsigned int cpuset_mems_cookie;
-       int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
+       int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
        struct mem_cgroup *memcg = NULL;
 
        gfp_mask &= gfp_allowed_mask;
@@ -2707,12 +2736,29 @@ retry_cpuset:
        if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
                alloc_flags |= ALLOC_CMA;
 #endif
+retry:
        /* First allocation attempt */
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                        zonelist, high_zoneidx, alloc_flags,
                        preferred_zone, migratetype);
        if (unlikely(!page)) {
                /*
+                * The first pass makes sure allocations are spread
+                * fairly within the local node.  However, the local
+                * node might have free pages left after the fairness
+                * batches are exhausted, and remote zones haven't
+                * even been considered yet.  Try once more without
+                * fairness, and include remote zones now, before
+                * entering the slowpath and waking kswapd: prefer
+                * spilling to a remote zone over swapping locally.
+                */
+               if (alloc_flags & ALLOC_FAIR) {
+                       reset_alloc_batches(zonelist, high_zoneidx,
+                                           preferred_zone);
+                       alloc_flags &= ~ALLOC_FAIR;
+                       goto retry;
+               }
+               /*
                 * Runtime PM, block IO and its error handling path
                 * can deadlock because I/O on the device might not
                 * complete.
@@ -5729,7 +5775,12 @@ module_init(init_per_zone_wmark_min)
 int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
-       proc_dointvec(table, write, buffer, length, ppos);
+       int rc;
+
+       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+       if (rc)
+               return rc;
+
        if (write) {
                user_min_free_kbytes = min_free_kbytes;
                setup_per_zone_wmarks();
@@ -5996,7 +6047,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
        pfn = page_to_pfn(page);
        bitmap = get_pageblock_bitmap(zone, pfn);
        bitidx = pfn_to_bitidx(zone, pfn);
-       VM_BUG_ON(!zone_spans_pfn(zone, pfn));
+       VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);
 
        for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
                if (flags & value)
@@ -6494,12 +6545,24 @@ static void dump_page_flags(unsigned long flags)
        printk(")\n");
 }
 
-void dump_page(struct page *page)
+void dump_page_badflags(struct page *page, char *reason, unsigned long badflags)
 {
        printk(KERN_ALERT
               "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
                page, atomic_read(&page->_count), page_mapcount(page),
                page->mapping, page->index);
        dump_page_flags(page->flags);
+       if (reason)
+               pr_alert("page dumped because: %s\n", reason);
+       if (page->flags & badflags) {
+               pr_alert("bad because of flags:\n");
+               dump_page_flags(page->flags & badflags);
+       }
        mem_cgroup_print_bad_page(page);
 }
+
+void dump_page(struct page *page, char *reason)
+{
+       dump_page_badflags(page, reason, 0);
+}
+EXPORT_SYMBOL_GPL(dump_page);