X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=f885eb82715950515ec708a4cde5d654ab53bdf7;hb=2bb921e526656556e68f99f5f15a4a1bf2691844;hp=b100255dedda6e48c9cba2d15bc1163b36292a3c;hpb=3d7e5fc37f91c3ad4974262e173d9ba36139652a;p=platform%2Fadaptation%2Frenesas_rcar%2Frenesas_kernel.git diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b100255..f885eb8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -721,7 +721,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order) return false; if (!PageHighMem(page)) { - debug_check_no_locks_freed(page_address(page),PAGE_SIZE<managed_pages += 1 << order; + page_zone(page)->managed_pages += nr_pages; set_page_refcounted(page); __free_pages(page, order); } #ifdef CONFIG_CMA -/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */ +/* Free whole pageblock and set its migration type to MIGRATE_CMA. */ void __init init_cma_reserved_pageblock(struct page *page) { unsigned i = pageblock_nr_pages; @@ -885,7 +886,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, int migratetype) { unsigned int current_order; - struct free_area * area; + struct free_area *area; struct page *page; /* Find a page of the appropriate size in the preferred list */ @@ -1007,14 +1008,60 @@ static void change_pageblock_range(struct page *pageblock_page, } } +/* + * If breaking a large block of pages, move all free pages to the preferred + * allocation list. If falling back for a reclaimable kernel allocation, be + * more aggressive about taking ownership of free pages. + * + * On the other hand, never change migration type of MIGRATE_CMA pageblocks + * nor move CMA pages to different free lists. We don't want unmovable pages + * to be allocated from MIGRATE_CMA areas. + * + * Returns the new migratetype of the pageblock (or the same old migratetype + * if it was unchanged). + */ +static int try_to_steal_freepages(struct zone *zone, struct page *page, + int start_type, int fallback_type) +{ + int current_order = page_order(page); + + if (is_migrate_cma(fallback_type)) + return fallback_type; + + /* Take ownership for orders >= pageblock_order */ + if (current_order >= pageblock_order) { + change_pageblock_range(page, current_order, start_type); + return start_type; + } + + if (current_order >= pageblock_order / 2 || + start_type == MIGRATE_RECLAIMABLE || + page_group_by_mobility_disabled) { + int pages; + + pages = move_freepages_block(zone, page, start_type); + + /* Claim the whole block if over half of it is free */ + if (pages >= (1 << (pageblock_order-1)) || + page_group_by_mobility_disabled) { + + set_pageblock_migratetype(page, start_type); + return start_type; + } + + } + + return fallback_type; +} + /* Remove an element from the buddy allocator from the fallback list */ static inline struct page * __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) { - struct free_area * area; + struct free_area *area; int current_order; struct page *page; - int migratetype, i; + int migratetype, new_type, i; /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order; @@ -1034,51 +1081,29 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) struct page, lru); area->nr_free--; - /* - * If breaking a large block of pages, move all free - * pages to the preferred allocation list. If falling - * back for a reclaimable kernel allocation, be more - * aggressive about taking ownership of free pages - * - * On the other hand, never change migration - * type of MIGRATE_CMA pageblocks nor move CMA - * pages on different free lists. We don't - * want unmovable pages to be allocated from - * MIGRATE_CMA areas. - */ - if (!is_migrate_cma(migratetype) && - (current_order >= pageblock_order / 2 || - start_migratetype == MIGRATE_RECLAIMABLE || - page_group_by_mobility_disabled)) { - int pages; - pages = move_freepages_block(zone, page, - start_migratetype); - - /* Claim the whole block if over half of it is free */ - if (pages >= (1 << (pageblock_order-1)) || - page_group_by_mobility_disabled) - set_pageblock_migratetype(page, - start_migratetype); - - migratetype = start_migratetype; - } + new_type = try_to_steal_freepages(zone, page, + start_migratetype, + migratetype); /* Remove the page from the freelists */ list_del(&page->lru); rmv_page_order(page); - /* Take ownership for orders >= pageblock_order */ - if (current_order >= pageblock_order && - !is_migrate_cma(migratetype)) - change_pageblock_range(page, current_order, - start_migratetype); - + /* + * Borrow the excess buddy pages as well, irrespective + * of whether we stole freepages, or took ownership of + * the pageblock or not. + * + * Exception: When borrowing from MIGRATE_CMA, release + * the excess buddy pages to CMA itself. + */ expand(zone, page, order, current_order, area, is_migrate_cma(migratetype) ? migratetype : start_migratetype); - trace_mm_page_alloc_extfrag(page, order, current_order, - start_migratetype, migratetype); + trace_mm_page_alloc_extfrag(page, order, + current_order, start_migratetype, migratetype, + new_type == start_migratetype); return page; } @@ -1526,6 +1551,7 @@ again: get_pageblock_migratetype(page)); } + __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); local_irq_restore(flags); @@ -1792,6 +1818,11 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); } +static bool zone_local(struct zone *local_zone, struct zone *zone) +{ + return node_distance(local_zone->node, zone->node) == LOCAL_DISTANCE; +} + static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes); @@ -1829,6 +1860,11 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) { } +static bool zone_local(struct zone *local_zone, struct zone *zone) +{ + return true; +} + static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return true; @@ -1864,12 +1900,37 @@ zonelist_scan: */ for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) { + unsigned long mark; + if (IS_ENABLED(CONFIG_NUMA) && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) continue; + BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); + if (unlikely(alloc_flags & ALLOC_NO_WATERMARKS)) + goto try_this_zone; + /* + * Distribute pages in proportion to the individual + * zone size to ensure fair page aging. The zone a + * page was allocated in should have no effect on the + * time the page has in memory before being reclaimed. + * + * When zone_reclaim_mode is enabled, try to stay in + * local zones in the fastpath. If that fails, the + * slowpath is entered, which will do another pass + * starting with the local zones, but ultimately fall + * back to remote zones that do not partake in the + * fairness round-robin cycle of this zonelist. + */ + if (alloc_flags & ALLOC_WMARK_LOW) { + if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) + continue; + if (zone_reclaim_mode && + !zone_local(preferred_zone, zone)) + continue; + } /* * When allocating a page cache page for writing, we * want to get it from a zone that is within its dirty @@ -1900,16 +1961,11 @@ zonelist_scan: (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone)) goto this_zone_full; - BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); - if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { - unsigned long mark; + mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; + if (!zone_watermark_ok(zone, order, mark, + classzone_idx, alloc_flags)) { int ret; - mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; - if (zone_watermark_ok(zone, order, mark, - classzone_idx, alloc_flags)) - goto try_this_zone; - if (IS_ENABLED(CONFIG_NUMA) && !did_zlc_setup && nr_online_nodes > 1) { /* @@ -2321,16 +2377,30 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, return page; } -static inline -void wake_all_kswapd(unsigned int order, struct zonelist *zonelist, - enum zone_type high_zoneidx, - enum zone_type classzone_idx) +static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, + enum zone_type high_zoneidx, + struct zone *preferred_zone) { struct zoneref *z; struct zone *zone; - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) - wakeup_kswapd(zone, order, classzone_idx); + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { + if (!(gfp_mask & __GFP_NO_KSWAPD)) + wakeup_kswapd(zone, order, zone_idx(preferred_zone)); + /* + * Only reset the batches of zones that were actually + * considered in the fast path, we don't want to + * thrash fairness information for zones that are not + * actually part of this zonelist's round-robin cycle. + */ + if (zone_reclaim_mode && !zone_local(preferred_zone, zone)) + continue; + mod_zone_page_state(zone, NR_ALLOC_BATCH, + high_wmark_pages(zone) - + low_wmark_pages(zone) - + zone_page_state(zone, NR_ALLOC_BATCH)); + } } static inline int @@ -2426,9 +2496,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto nopage; restart: - if (!(gfp_mask & __GFP_NO_KSWAPD)) - wake_all_kswapd(order, zonelist, high_zoneidx, - zone_idx(preferred_zone)); + prepare_slowpath(gfp_mask, order, zonelist, + high_zoneidx, preferred_zone); /* * OK, we're below the kswapd watermark and have kicked background @@ -3104,7 +3173,7 @@ void show_free_areas(unsigned int filter) } for_each_populated_zone(zone) { - unsigned long nr[MAX_ORDER], flags, order, total = 0; + unsigned long nr[MAX_ORDER], flags, order, total = 0; unsigned char types[MAX_ORDER]; if (skip_free_areas_node(filter, zone_to_nid(zone))) @@ -3416,11 +3485,11 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) static int default_zonelist_order(void) { int nid, zone_type; - unsigned long low_kmem_size,total_size; + unsigned long low_kmem_size, total_size; struct zone *z; int average_size; /* - * ZONE_DMA and ZONE_DMA32 can be very small area in the system. + * ZONE_DMA and ZONE_DMA32 can be very small area in the system. * If they are really small and used heavily, the system can fall * into OOM very easily. * This function detect ZONE_DMA/DMA32 size and configures zone order. @@ -3452,9 +3521,9 @@ static int default_zonelist_order(void) return ZONELIST_ORDER_NODE; /* * look into each node's config. - * If there is a node whose DMA/DMA32 memory is very big area on - * local memory, NODE_ORDER may be suitable. - */ + * If there is a node whose DMA/DMA32 memory is very big area on + * local memory, NODE_ORDER may be suitable. + */ average_size = total_size / (nodes_weight(node_states[N_MEMORY]) + 1); for_each_online_node(nid) { @@ -4180,7 +4249,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) if (!zone->wait_table) return -ENOMEM; - for(i = 0; i < zone->wait_table_hash_nr_entries; ++i) + for (i = 0; i < zone->wait_table_hash_nr_entries; ++i) init_waitqueue_head(zone->wait_table + i); return 0; @@ -4586,7 +4655,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ -void __init set_pageblock_order(void) +void __paginginit set_pageblock_order(void) { unsigned int order; @@ -4614,7 +4683,7 @@ void __init set_pageblock_order(void) * include/linux/pageblock-flags.h for the values of pageblock_order based on * the kernel config */ -void __init set_pageblock_order(void) +void __paginginit set_pageblock_order(void) { } @@ -4728,8 +4797,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, spin_lock_init(&zone->lru_lock); zone_seqlock_init(zone); zone->zone_pgdat = pgdat; - zone_pcp_init(zone); + + /* For bootup, initialized properly in watermark setup */ + mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); + lruvec_init(&zone->lruvec); if (!size) continue; @@ -4930,7 +5002,7 @@ static unsigned long __init early_calculate_totalpages(void) if (pages) node_set_state(nid, N_MEMORY); } - return totalpages; + return totalpages; } /* @@ -5047,7 +5119,7 @@ restart: /* * Some kernelcore has been met, update counts and * break if the kernelcore for this node has been - * satisified + * satisfied */ required_kernelcore -= min(required_kernelcore, size_pages); @@ -5061,7 +5133,7 @@ restart: * If there is still required_kernelcore, we do another pass with one * less node in the count. This will push zone_movable_pfn[nid] further * along on the nodes that still have memory until kernelcore is - * satisified + * satisfied */ usable_nodes--; if (usable_nodes && required_kernelcore > usable_nodes) @@ -5286,8 +5358,10 @@ void __init mem_init_print_info(const char *str) * 3) .rodata.* may be embedded into .text or .data sections. */ #define adj_init_size(start, end, size, pos, adj) \ - if (start <= pos && pos < end && size > adj) \ - size -= adj; + do { \ + if (start <= pos && pos < end && size > adj) \ + size -= adj; \ + } while (0) adj_init_size(__init_begin, __init_end, init_data_size, _sinittext, init_code_size); @@ -5361,7 +5435,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, * This is only okay since the processor is dead and cannot * race with what we are doing. */ - refresh_cpu_vm_stats(cpu); + cpu_vm_stats_fold(cpu); } return NOTIFY_OK; } @@ -5498,6 +5572,11 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); + __mod_zone_page_state(zone, NR_ALLOC_BATCH, + high_wmark_pages(zone) - + low_wmark_pages(zone) - + zone_page_state(zone, NR_ALLOC_BATCH)); + setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); } @@ -5570,7 +5649,7 @@ static void __meminit setup_per_zone_inactive_ratio(void) * we want it large (64MB max). But it is not linear, because network * bandwidth does not increase linearly with machine size. We use * - * min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy: + * min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy: * min_free_kbytes = sqrt(lowmem_kbytes * 16) * * which yields @@ -5614,11 +5693,11 @@ int __meminit init_per_zone_wmark_min(void) module_init(init_per_zone_wmark_min) /* - * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so + * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so * that we can call two helper functions whenever min_free_kbytes * changes. */ -int min_free_kbytes_sysctl_handler(ctl_table *table, int write, +int min_free_kbytes_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec(table, write, buffer, length, ppos); @@ -5682,8 +5761,8 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, /* * percpu_pagelist_fraction - changes the pcp->high for each zone on each - * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist - * can have before it gets flushed back to buddy allocator. + * cpu. It is the fraction of total pages in each zone that a hot per cpu + * pagelist can have before it gets flushed back to buddy allocator. */ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) @@ -5745,9 +5824,10 @@ void *__init alloc_large_system_hash(const char *tablename, if (!numentries) { /* round applicable memory size up to nearest megabyte */ numentries = nr_kernel_pages; - numentries += (1UL << (20 - PAGE_SHIFT)) - 1; - numentries >>= 20 - PAGE_SHIFT; - numentries <<= 20 - PAGE_SHIFT; + + /* It isn't necessary when PAGE_SIZE >= 1MB */ + if (PAGE_SHIFT < 20) + numentries = round_up(numentries, (1<<20)/PAGE_SIZE); /* limit to 1 bucket per 2^scale bytes of low memory */ if (scale > PAGE_SHIFT) @@ -5900,7 +5980,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, * This function checks whether pageblock includes unmovable pages or not. * If @count is not zero, it is okay to include less @count unmovable pages * - * PageLRU check wihtout isolation or lru_lock could race so that + * PageLRU check without isolation or lru_lock could race so that * MIGRATE_MOVABLE block might include unmovable pages. It means you can't * expect this function should be exact. */