return -1;
}
+/*
+ * Reserve a pageblock for exclusive use of high-order atomic allocations if
+ * there are no empty page blocks that contain a page with a suitable order
+ */
+static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
+ unsigned int alloc_order)
+{
+ int mt;
+ unsigned long max_managed, flags;
+
+ /*
+ * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
+ * Check is race-prone but harmless.
+ */
+ max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
+ if (zone->nr_reserved_highatomic >= max_managed)
+ return;
+
+ spin_lock_irqsave(&zone->lock, flags);
+
+ /* Recheck the nr_reserved_highatomic limit under the lock */
+ if (zone->nr_reserved_highatomic >= max_managed)
+ goto out_unlock;
+
+ /* Yoink! */
+ mt = get_pageblock_migratetype(page);
+ if (mt != MIGRATE_HIGHATOMIC &&
+ !is_migrate_isolate(mt) && !is_migrate_cma(mt)) {
+ zone->nr_reserved_highatomic += pageblock_nr_pages;
+ set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
+ move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Used when an allocation is about to fail under memory pressure. This
+ * potentially hurts the reliability of high-order allocations when under
+ * intense memory pressure but failed atomic allocations should be easier
+ * to recover from than an OOM.
+ */
+static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
+{
+ struct zonelist *zonelist = ac->zonelist;
+ unsigned long flags;
+ struct zoneref *z;
+ struct zone *zone;
+ struct page *page;
+ int order;
+
+ for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
+ ac->nodemask) {
+ /* Preserve at least one pageblock */
+ if (zone->nr_reserved_highatomic <= pageblock_nr_pages)
+ continue;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ for (order = 0; order < MAX_ORDER; order++) {
+ struct free_area *area = &(zone->free_area[order]);
+
+ if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+ continue;
+
+ page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
+ struct page, lru);
+
+ /*
+ * It should never happen but changes to locking could
+ * inadvertently allow a per-cpu drain to add pages
+ * to MIGRATE_HIGHATOMIC while unreserving so be safe
+ * and watch for underflows.
+ */
+ zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
+ zone->nr_reserved_highatomic);
+
+ /*
+ * Convert to ac->migratetype and avoid the normal
+ * pageblock stealing heuristics. Minimally, the caller
+ * is doing the work and needs the pages. More
+ * importantly, if the block was always converted to
+ * MIGRATE_UNMOVABLE or another type then the number
+ * of pageblocks that cannot be completely freed
+ * may increase.
+ */
+ set_pageblock_migratetype(page, ac->migratetype);
+ move_freepages_block(zone, page, ac->migratetype);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+}
+
/* Remove an element from the buddy allocator from the fallback list */
static inline struct page *
__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
* Call me with the zone->lock already held.
*/
static struct page *__rmqueue(struct zone *zone, unsigned int order,
- int migratetype)
+ int migratetype, gfp_t gfp_flags)
{
struct page *page;
spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
- struct page *page = __rmqueue(zone, order, migratetype);
+ struct page *page = __rmqueue(zone, order, migratetype, 0);
if (unlikely(page == NULL))
break;
static inline
struct page *buffered_rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
- gfp_t gfp_flags, int migratetype)
+ gfp_t gfp_flags, int alloc_flags, int migratetype)
{
unsigned long flags;
struct page *page;
WARN_ON_ONCE(order > 1);
}
spin_lock_irqsave(&zone->lock, flags);
- page = __rmqueue(zone, order, migratetype);
+
+ page = NULL;
+ if (alloc_flags & ALLOC_HARDER) {
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+ if (page)
+ trace_mm_page_alloc_zone_locked(page, order, migratetype);
+ }
+ if (!page)
+ page = __rmqueue(zone, order, migratetype, gfp_flags);
spin_unlock(&zone->lock);
if (!page)
goto failed;
unsigned long mark, int classzone_idx, int alloc_flags,
long free_pages)
{
- /* free_pages may go negative - that's OK */
long min = mark;
int o;
long free_cma = 0;
+ /* free_pages may go negative - that's OK */
free_pages -= (1 << order) - 1;
+
if (alloc_flags & ALLOC_HIGH)
min -= min / 2;
- if (alloc_flags & ALLOC_HARDER)
+
+ /*
+ * If the caller does not have rights to ALLOC_HARDER then subtract
+ * the high-atomic reserves. This will over-estimate the size of the
+ * atomic reserve but it avoids a search.
+ */
+ if (likely(!(alloc_flags & ALLOC_HARDER)))
+ free_pages -= z->nr_reserved_highatomic;
+ else
min -= min / 4;
#ifdef CONFIG_CMA
try_this_zone:
page = buffered_rmqueue(ac->preferred_zone, zone, order,
- gfp_mask, ac->migratetype);
+ gfp_mask, alloc_flags, ac->migratetype);
if (page) {
if (prep_new_page(page, order, gfp_mask, alloc_flags))
goto try_this_zone;
+
+ /*
+ * If this is a high-order atomic allocation then check
+ * if the pageblock should be reserved for the future
+ */
+ if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
+ reserve_highatomic_pageblock(page, zone, order);
+
return page;
}
}
/*
* If an allocation failed after direct reclaim, it could be because
- * pages are pinned on the per-cpu lists. Drain them and try again
+ * pages are pinned on the per-cpu lists or in high alloc reserves.
+ * Shrink them them and try again
*/
if (!page && !drained) {
+ unreserve_highatomic_pageblock(ac);
drain_all_pages(NULL);
drained = true;
goto retry;