#include <linux/ftrace_event.h>
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
+#include <linux/page-debug-flags.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
+/*
+ * When calculating the number of globally allowed dirty pages, there
+ * is a certain number of per-zone reserves that should not be
+ * considered dirtyable memory. This is the sum of those reserves
+ * over all existing zones that contribute dirtyable memory.
+ */
+unsigned long dirty_balance_reserve __read_mostly;
+
int percpu_pagelist_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
clear_highpage(page + i);
}
+#ifdef CONFIG_DEBUG_PAGEALLOC
+unsigned int _debug_guardpage_minorder;
+
+static int __init debug_guardpage_minorder_setup(char *buf)
+{
+ unsigned long res;
+
+ if (kstrtoul(buf, 10, &res) < 0 || res > MAX_ORDER / 2) {
+ printk(KERN_ERR "Bad debug_guardpage_minorder value\n");
+ return 0;
+ }
+ _debug_guardpage_minorder = res;
+ printk(KERN_INFO "Setting debug_guardpage_minorder to %lu\n", res);
+ return 0;
+}
+__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
+
+static inline void set_page_guard_flag(struct page *page)
+{
+ __set_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+
+static inline void clear_page_guard_flag(struct page *page)
+{
+ __clear_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+#else
+static inline void set_page_guard_flag(struct page *page) { }
+static inline void clear_page_guard_flag(struct page *page) { }
+#endif
+
static inline void set_page_order(struct page *page, int order)
{
set_page_private(page, order);
if (page_zone_id(page) != page_zone_id(buddy))
return 0;
+ if (page_is_guard(buddy) && page_order(buddy) == order) {
+ VM_BUG_ON(page_count(buddy) != 0);
+ return 1;
+ }
+
if (PageBuddy(buddy) && page_order(buddy) == order) {
VM_BUG_ON(page_count(buddy) != 0);
return 1;
buddy = page + (buddy_idx - page_idx);
if (!page_is_buddy(page, buddy, order))
break;
-
- /* Our buddy is free, merge with it and move up one order. */
- list_del(&buddy->lru);
- zone->free_area[order].nr_free--;
- rmv_page_order(buddy);
+ /*
+ * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
+ * merge with it and move up one order.
+ */
+ if (page_is_guard(buddy)) {
+ clear_page_guard_flag(buddy);
+ set_page_private(page, 0);
+ __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
+ } else {
+ list_del(&buddy->lru);
+ zone->free_area[order].nr_free--;
+ rmv_page_order(buddy);
+ }
combined_idx = buddy_idx & page_idx;
page = page + (combined_idx - page_idx);
page_idx = combined_idx;
high--;
size >>= 1;
VM_BUG_ON(bad_range(zone, &page[size]));
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ if (high < debug_guardpage_minorder()) {
+ /*
+ * Mark as guard pages (or page), that will allow to
+ * merge back to allocator when buddy will be freed.
+ * Corresponding page table entries will not be touched,
+ * pages will stay not present in virtual address space
+ */
+ INIT_LIST_HEAD(&page[size].lru);
+ set_page_guard_flag(&page[size]);
+ set_page_private(&page[size], high);
+ /* Guard pages are not available for any usage */
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high));
+ continue;
+ }
+#endif
list_add(&page[size].lru, &area->free_list[migratetype]);
area->nr_free++;
set_page_order(&page[size], high);
long min = mark;
int o;
- free_pages -= (1 << order) + 1;
+ free_pages -= (1 << order) - 1;
if (alloc_flags & ALLOC_HIGH)
min -= min / 2;
if (alloc_flags & ALLOC_HARDER)
if ((alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed_softwall(zone, gfp_mask))
continue;
+ /*
+ * When allocating a page cache page for writing, we
+ * want to get it from a zone that is within its dirty
+ * limit, such that no single zone holds more than its
+ * proportional share of globally allowed dirty pages.
+ * The dirty limits take into account the zone's
+ * lowmem reserves and high watermark so that kswapd
+ * should be able to balance it without having to
+ * write pages from its LRU list.
+ *
+ * This may look like it could increase pressure on
+ * lower zones by failing allocations in higher zones
+ * before they are full. But the pages that do spill
+ * over are limited as the lower zones are protected
+ * by this very same mechanism. It should not become
+ * a practical burden to them.
+ *
+ * XXX: For now, allow allocations to potentially
+ * exceed the per-zone dirty limit in the slowpath
+ * (ALLOC_WMARK_LOW unset) before going into reclaim,
+ * which is important when on a NUMA setup the allowed
+ * zones are together not big enough to reach the
+ * global limit. The proper fix for these situations
+ * will require awareness of zones in the
+ * dirty-throttling and the flusher threads.
+ */
+ if ((alloc_flags & ALLOC_WMARK_LOW) &&
+ (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone))
+ goto this_zone_full;
BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
{
unsigned int filter = SHOW_MEM_FILTER_NODES;
- if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+ if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
+ debug_guardpage_minorder() > 0)
return;
/*
if (max > zone->present_pages)
max = zone->present_pages;
reserve_pages += max;
+ /*
+ * Lowmem reserves are not available to
+ * GFP_HIGHUSER page cache allocations and
+ * kswapd tries to balance zones to their high
+ * watermark. As a result, neither should be
+ * regarded as dirtyable memory, to prevent a
+ * situation where reclaim has to clean pages
+ * in order to balance the zones.
+ */
+ zone->dirty_balance_reserve = max;
}
}
+ dirty_balance_reserve = reserve_pages;
totalreserve_pages = reserve_pages;
}