}
#endif /* CONFIG_COMPACTION */
-static int should_release_lock(spinlock_t *lock)
+/*
+ * Compaction requires the taking of some coarse locks that are potentially
+ * very heavily contended. For async compaction, back out if the lock cannot
+ * be taken immediately. For sync compaction, spin on the lock if needed.
+ *
+ * Returns true if the lock is held
+ * Returns false if the lock is not held and compaction should abort
+ */
+static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
+ struct compact_control *cc)
{
- /*
- * Sched contention has higher priority here as we may potentially
- * have to abort whole compaction ASAP. Returning with lock contention
- * means we will try another zone, and further decisions are
- * influenced only when all zones are lock contended. That means
- * potentially missing a lock contention is less critical.
- */
- if (need_resched())
- return COMPACT_CONTENDED_SCHED;
- else if (spin_is_contended(lock))
- return COMPACT_CONTENDED_LOCK;
+ if (cc->mode == MIGRATE_ASYNC) {
+ if (!spin_trylock_irqsave(lock, *flags)) {
+ cc->contended = COMPACT_CONTENDED_LOCK;
+ return false;
+ }
+ } else {
+ spin_lock_irqsave(lock, *flags);
+ }
- return COMPACT_CONTENDED_NONE;
+ return true;
}
/*
* Compaction requires the taking of some coarse locks that are potentially
- * very heavily contended. Check if the process needs to be scheduled or
- * if the lock is contended. For async compaction, back out in the event
- * if contention is severe. For sync compaction, schedule.
+ * very heavily contended. The lock should be periodically unlocked to avoid
+ * having disabled IRQs for a long time, even when there is nobody waiting on
+ * the lock. It might also be that allowing the IRQs will result in
+ * need_resched() becoming true. If scheduling is needed, async compaction
+ * aborts. Sync compaction schedules.
+ * Either compaction type will also abort if a fatal signal is pending.
+ * In either case if the lock was locked, it is dropped and not regained.
*
- * Returns true if the lock is held.
- * Returns false if the lock is released and compaction should abort
+ * Returns true if compaction should abort due to fatal signal pending, or
+ * async compaction due to need_resched()
+ * Returns false when compaction can continue (sync compaction might have
+ * scheduled)
*/
-static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
- bool locked, struct compact_control *cc)
+static bool compact_unlock_should_abort(spinlock_t *lock,
+ unsigned long flags, bool *locked, struct compact_control *cc)
{
- int contended = should_release_lock(lock);
+ if (*locked) {
+ spin_unlock_irqrestore(lock, flags);
+ *locked = false;
+ }
- if (contended) {
- if (locked) {
- spin_unlock_irqrestore(lock, *flags);
- locked = false;
- }
+ if (fatal_signal_pending(current)) {
+ cc->contended = COMPACT_CONTENDED_SCHED;
+ return true;
+ }
- /* async aborts if taking too long or contended */
+ if (need_resched()) {
if (cc->mode == MIGRATE_ASYNC) {
- cc->contended = contended;
- return false;
+ cc->contended = COMPACT_CONTENDED_SCHED;
+ return true;
}
-
cond_resched();
}
- if (!locked)
- spin_lock_irqsave(lock, *flags);
- return true;
+ return false;
}
/*
* Aside from avoiding lock contention, compaction also periodically checks
* need_resched() and either schedules in sync compaction or aborts async
- * compaction. This is similar to what compact_checklock_irqsave() does, but
+ * compaction. This is similar to what compact_unlock_should_abort() does, but
* is used where no lock is concerned.
*
* Returns false when no scheduling was needed, or sync compaction scheduled.
static bool suitable_migration_target(struct page *page)
{
/* If the page is a large free page, then disallow migration */
- if (PageBuddy(page) && page_order(page) >= pageblock_order)
- return false;
+ if (PageBuddy(page)) {
+ /*
+ * We are checking page_order without zone->lock taken. But
+ * the only small danger is that we skip a potentially suitable
+ * pageblock, so it's not worth to check order for valid range.
+ */
+ if (page_order_unsafe(page) >= pageblock_order)
+ return false;
+ }
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
if (migrate_async_suitable(get_pageblock_migratetype(page)))
* (even though it may still end up isolating some pages).
*/
static unsigned long isolate_freepages_block(struct compact_control *cc,
- unsigned long blockpfn,
+ unsigned long *start_pfn,
unsigned long end_pfn,
struct list_head *freelist,
bool strict)
{
int nr_scanned = 0, total_isolated = 0;
struct page *cursor, *valid_page = NULL;
- unsigned long flags;
+ unsigned long flags = 0;
bool locked = false;
+ unsigned long blockpfn = *start_pfn;
cursor = pfn_to_page(blockpfn);
int isolated, i;
struct page *page = cursor;
+ /*
+ * Periodically drop the lock (if held) regardless of its
+ * contention, to give chance to IRQs. Abort if fatal signal
+ * pending or async compaction detects need_resched()
+ */
+ if (!(blockpfn % SWAP_CLUSTER_MAX)
+ && compact_unlock_should_abort(&cc->zone->lock, flags,
+ &locked, cc))
+ break;
+
nr_scanned++;
if (!pfn_valid_within(blockpfn))
goto isolate_fail;
goto isolate_fail;
/*
- * The zone lock must be held to isolate freepages.
- * Unfortunately this is a very coarse lock and can be
- * heavily contended if there are parallel allocations
- * or parallel compactions. For async compaction do not
- * spin on the lock and we acquire the lock as late as
- * possible.
+ * If we already hold the lock, we can skip some rechecking.
+ * Note that if we hold the lock now, checked_pageblock was
+ * already set in some previous iteration (or strict is true),
+ * so it is correct to skip the suitable migration target
+ * recheck as well.
*/
- locked = compact_checklock_irqsave(&cc->zone->lock, &flags,
- locked, cc);
- if (!locked)
- break;
+ if (!locked) {
+ /*
+ * The zone lock must be held to isolate freepages.
+ * Unfortunately this is a very coarse lock and can be
+ * heavily contended if there are parallel allocations
+ * or parallel compactions. For async compaction do not
+ * spin on the lock and we acquire the lock as late as
+ * possible.
+ */
+ locked = compact_trylock_irqsave(&cc->zone->lock,
+ &flags, cc);
+ if (!locked)
+ break;
- /* Recheck this is a buddy page under lock */
- if (!PageBuddy(page))
- goto isolate_fail;
+ /* Recheck this is a buddy page under lock */
+ if (!PageBuddy(page))
+ goto isolate_fail;
+ }
/* Found a free page, break it into order-0 pages */
isolated = split_free_page(page);
}
+ /* Record how far we have got within the block */
+ *start_pfn = blockpfn;
+
trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
/*
for (; pfn < end_pfn; pfn += isolated,
block_end_pfn += pageblock_nr_pages) {
+ /* Protect pfn from changing by isolate_freepages_block */
+ unsigned long isolate_start_pfn = pfn;
block_end_pfn = min(block_end_pfn, end_pfn);
if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
break;
- isolated = isolate_freepages_block(cc, pfn, block_end_pfn,
- &freelist, true);
+ isolated = isolate_freepages_block(cc, &isolate_start_pfn,
+ block_end_pfn, &freelist, true);
/*
* In strict mode, isolate_freepages_block() returns 0 if
unsigned long nr_scanned = 0, nr_isolated = 0;
struct list_head *migratelist = &cc->migratepages;
struct lruvec *lruvec;
- unsigned long flags;
+ unsigned long flags = 0;
bool locked = false;
struct page *page = NULL, *valid_page = NULL;
/* Time to isolate some pages for migration */
for (; low_pfn < end_pfn; low_pfn++) {
- /* give a chance to irqs before checking need_resched() */
- if (locked && !(low_pfn % SWAP_CLUSTER_MAX)) {
- if (should_release_lock(&zone->lru_lock)) {
- spin_unlock_irqrestore(&zone->lru_lock, flags);
- locked = false;
- }
- }
+ /*
+ * Periodically drop the lock (if held) regardless of its
+ * contention, to give chance to IRQs. Abort async compaction
+ * if contended.
+ */
+ if (!(low_pfn % SWAP_CLUSTER_MAX)
+ && compact_unlock_should_abort(&zone->lru_lock, flags,
+ &locked, cc))
+ break;
if (!pfn_valid_within(low_pfn))
continue;
valid_page = page;
/*
- * Skip if free. page_order cannot be used without zone->lock
- * as nothing prevents parallel allocations or buddy merging.
+ * Skip if free. We read page order here without zone lock
+ * which is generally unsafe, but the race window is small and
+ * the worst thing that can happen is that we skip some
+ * potential isolation targets.
*/
- if (PageBuddy(page))
+ if (PageBuddy(page)) {
+ unsigned long freepage_order = page_order_unsafe(page);
+
+ /*
+ * Without lock, we cannot be sure that what we got is
+ * a valid page order. Consider only values in the
+ * valid order range to prevent low_pfn overflow.
+ */
+ if (freepage_order > 0 && freepage_order < MAX_ORDER)
+ low_pfn += (1UL << freepage_order) - 1;
continue;
+ }
/*
* Check may be lockless but that's ok as we recheck later.
*/
if (!PageLRU(page)) {
if (unlikely(balloon_page_movable(page))) {
- if (locked && balloon_page_isolate(page)) {
+ if (balloon_page_isolate(page)) {
/* Successfully isolated */
goto isolate_success;
}
page_count(page) > page_mapcount(page))
continue;
- /* Check if it is ok to still hold the lock */
- locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
- locked, cc);
- if (!locked || fatal_signal_pending(current))
- break;
+ /* If we already hold the lock, we can skip some rechecking */
+ if (!locked) {
+ locked = compact_trylock_irqsave(&zone->lru_lock,
+ &flags, cc);
+ if (!locked)
+ break;
- /* Recheck PageLRU and PageTransHuge under lock */
- if (!PageLRU(page))
- continue;
- if (PageTransHuge(page)) {
- low_pfn += (1 << compound_order(page)) - 1;
- continue;
+ /* Recheck PageLRU and PageTransHuge under lock */
+ if (!PageLRU(page))
+ continue;
+ if (PageTransHuge(page)) {
+ low_pfn += (1 << compound_order(page)) - 1;
+ continue;
+ }
}
lruvec = mem_cgroup_page_lruvec(page, zone);
}
}
+ /*
+ * The PageBuddy() check could have potentially brought us outside
+ * the range to be scanned.
+ */
+ if (unlikely(low_pfn > end_pfn))
+ low_pfn = end_pfn;
+
if (locked)
spin_unlock_irqrestore(&zone->lru_lock, flags);
struct zone *zone = cc->zone;
struct page *page;
unsigned long block_start_pfn; /* start of current pageblock */
+ unsigned long isolate_start_pfn; /* exact pfn we start at */
unsigned long block_end_pfn; /* end of current pageblock */
unsigned long low_pfn; /* lowest pfn scanner is able to scan */
int nr_freepages = cc->nr_freepages;
/*
* Initialise the free scanner. The starting point is where we last
* successfully isolated from, zone-cached value, or the end of the
- * zone when isolating for the first time. We need this aligned to
- * the pageblock boundary, because we do
+ * zone when isolating for the first time. For looping we also need
+ * this pfn aligned down to the pageblock boundary, because we do
* block_start_pfn -= pageblock_nr_pages in the for loop.
* For ending point, take care when isolating in last pageblock of a
* a zone which ends in the middle of a pageblock.
* The low boundary is the end of the pageblock the migration scanner
* is using.
*/
+ isolate_start_pfn = cc->free_pfn;
block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
zone_end_pfn(zone));
*/
for (; block_start_pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
block_end_pfn = block_start_pfn,
- block_start_pfn -= pageblock_nr_pages) {
+ block_start_pfn -= pageblock_nr_pages,
+ isolate_start_pfn = block_start_pfn) {
unsigned long isolated;
/*
if (!isolation_suitable(cc, page))
continue;
- /* Found a block suitable for isolating free pages from */
- cc->free_pfn = block_start_pfn;
- isolated = isolate_freepages_block(cc, block_start_pfn,
+ /* Found a block suitable for isolating free pages from. */
+ isolated = isolate_freepages_block(cc, &isolate_start_pfn,
block_end_pfn, freelist, false);
nr_freepages += isolated;
/*
+ * Remember where the free scanner should restart next time,
+ * which is where isolate_freepages_block() left off.
+ * But if it scanned the whole pageblock, isolate_start_pfn
+ * now points at block_end_pfn, which is the start of the next
+ * pageblock.
+ * In that case we will however want to restart at the start
+ * of the previous pageblock.
+ */
+ cc->free_pfn = (isolate_start_pfn < block_end_pfn) ?
+ isolate_start_pfn :
+ block_start_pfn - pageblock_nr_pages;
+
+ /*
* Set a flag that we successfully isolated in this pageblock.
* In the next loop iteration, zone->compact_cached_free_pfn
* will not be updated and thus it will effectively contain the
return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
}
-static int compact_finished(struct zone *zone,
- struct compact_control *cc)
+static int compact_finished(struct zone *zone, struct compact_control *cc,
+ const int migratetype)
{
unsigned int order;
unsigned long watermark;
struct free_area *area = &zone->free_area[order];
/* Job done if page is free of the right migratetype */
- if (!list_empty(&area->free_list[cc->migratetype]))
+ if (!list_empty(&area->free_list[migratetype]))
return COMPACT_PARTIAL;
/* Job done if allocation would set block type */
int ret;
unsigned long start_pfn = zone->zone_start_pfn;
unsigned long end_pfn = zone_end_pfn(zone);
+ const int migratetype = gfpflags_to_migratetype(cc->gfp_mask);
const bool sync = cc->mode != MIGRATE_ASYNC;
ret = compaction_suitable(zone, cc->order);
migrate_prep_local();
- while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
+ while ((ret = compact_finished(zone, cc, migratetype)) ==
+ COMPACT_CONTINUE) {
int err;
switch (isolate_migratepages(zone, cc)) {
.nr_freepages = 0,
.nr_migratepages = 0,
.order = order,
- .migratetype = allocflags_to_migratetype(gfp_mask),
+ .gfp_mask = gfp_mask,
.zone = zone,
.mode = mode,
};
return COMPACT_SKIPPED;
#ifdef CONFIG_CMA
- if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
#endif
/* Compact each zone in the list */