#include <linux/mm_inline.h>
#include <linux/firmware-map.h>
#include <linux/stop_machine.h>
+#include <linux/hugetlb.h>
#include <asm/tlbflush.h>
void lock_memory_hotplug(void)
{
mutex_lock(&mem_hotplug_mutex);
-
- /* for exclusive hibernation if CONFIG_HIBERNATION=y */
- lock_system_sleep();
}
void unlock_memory_hotplug(void)
{
- unlock_system_sleep();
mutex_unlock(&mem_hotplug_mutex);
}
zone = &pgdat->node_zones[0];
for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
- if (zone->wait_table) {
+ if (zone_is_initialized(zone)) {
nr_pages = zone->wait_table_hash_nr_entries
* sizeof(wait_queue_head_t);
nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
zone_span_writelock(zone);
- old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
- if (!zone->spanned_pages || start_pfn < zone->zone_start_pfn)
+ old_zone_end_pfn = zone_end_pfn(zone);
+ if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
zone->zone_start_pfn = start_pfn;
zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
goto out_fail;
/* use start_pfn for z1's start_pfn if z1 is empty */
- if (z1->spanned_pages)
+ if (!zone_is_empty(z1))
z1_start_pfn = z1->zone_start_pfn;
else
z1_start_pfn = start_pfn;
goto out_fail;
/* use end_pfn for z2's end_pfn if z2 is empty */
- if (z2->spanned_pages)
+ if (!zone_is_empty(z2))
z2_end_pfn = zone_end_pfn(z2);
else
z2_end_pfn = end_pfn;
static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
unsigned long end_pfn)
{
- unsigned long zone_start_pfn = zone->zone_start_pfn;
- unsigned long zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ unsigned long zone_start_pfn = zone->zone_start_pfn;
+ unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
+ unsigned long zone_end_pfn = z;
unsigned long pfn;
struct mem_section *ms;
int nid = zone_to_nid(zone);
return ret;
}
+static int check_hotplug_memory_range(u64 start, u64 size)
+{
+ u64 start_pfn = start >> PAGE_SHIFT;
+ u64 nr_pages = size >> PAGE_SHIFT;
+
+ /* Memory range must be aligned with section */
+ if ((start_pfn & ~PAGE_SECTION_MASK) ||
+ (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) {
+ pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx\n",
+ (unsigned long long)start,
+ (unsigned long long)size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
int __ref add_memory(int nid, u64 start, u64 size)
{
struct resource *res;
int ret;
+ ret = check_hotplug_memory_range(start, size);
+ if (ret)
+ return ret;
+
lock_memory_hotplug();
res = register_memory_resource(start, size);
}
/*
- * Scanning pfn is much easier than scanning lru list.
- * Scan pfn from start to end and Find LRU page.
+ * Scan pfn range [start,end) to find movable/migratable pages (LRU pages
+ * and hugepages). We scan pfn because it's much easier than scanning over
+ * linked list. This function returns the pfn of the first found movable
+ * page if it's found, otherwise 0.
*/
-static unsigned long scan_lru_pages(unsigned long start, unsigned long end)
+static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
{
unsigned long pfn;
struct page *page;
page = pfn_to_page(pfn);
if (PageLRU(page))
return pfn;
+ if (PageHuge(page)) {
+ if (is_hugepage_active(page))
+ return pfn;
+ else
+ pfn = round_up(pfn + 1,
+ 1 << compound_order(page)) - 1;
+ }
}
}
return 0;
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
+
+ if (PageHuge(page)) {
+ struct page *head = compound_head(page);
+ pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
+ if (compound_order(head) > PFN_SECTION_SHIFT) {
+ ret = -EBUSY;
+ break;
+ }
+ if (isolate_huge_page(page, &source))
+ move_pages -= 1 << compound_order(head);
+ continue;
+ }
+
if (!get_page_unless_zero(page))
continue;
/*
}
if (!list_empty(&source)) {
if (not_managed) {
- putback_lru_pages(&source);
+ putback_movable_pages(&source);
goto out;
}
ret = migrate_pages(&source, alloc_migrate_target, 0,
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
if (ret)
- putback_lru_pages(&source);
+ putback_movable_pages(&source);
}
out:
return ret;
struct zone *zone;
struct memory_notify arg;
- BUG_ON(start_pfn >= end_pfn);
/* at least, alignment against pageblock is necessary */
if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
return -EINVAL;
drain_all_pages();
}
- pfn = scan_lru_pages(start_pfn, end_pfn);
- if (pfn) { /* We have page on LRU */
+ pfn = scan_movable_pages(start_pfn, end_pfn);
+ if (pfn) { /* We have movable pages */
ret = do_migrate_range(pfn, end_pfn);
if (!ret) {
drain = 1;
yield();
/* drain pcp pages, this is synchronous. */
drain_all_pages();
+ /*
+ * dissolve free hugepages in the memory block before doing offlining
+ * actually in order to make hugetlbfs's object counting consistent.
+ */
+ dissolve_free_huge_pages(start_pfn, end_pfn);
/* check again */
offlined_pages = check_pages_isolated(start_pfn, end_pfn);
if (offlined_pages < 0) {
return ret;
}
-static int check_cpu_on_node(void *data)
+static int check_cpu_on_node(pg_data_t *pgdat)
{
- struct pglist_data *pgdat = data;
int cpu;
for_each_present_cpu(cpu) {
return 0;
}
-static void unmap_cpu_on_node(void *data)
+static void unmap_cpu_on_node(pg_data_t *pgdat)
{
#ifdef CONFIG_ACPI_NUMA
- struct pglist_data *pgdat = data;
int cpu;
for_each_possible_cpu(cpu)
#endif
}
-static int check_and_unmap_cpu_on_node(void *data)
+static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
{
- int ret = check_cpu_on_node(data);
+ int ret;
+ ret = check_cpu_on_node(pgdat);
if (ret)
return ret;
* the cpu_to_node() now.
*/
- unmap_cpu_on_node(data);
+ unmap_cpu_on_node(pgdat);
return 0;
}
-/* offline the node if all memory sections of this node are removed */
+/**
+ * try_offline_node
+ *
+ * Offline a node if all memory sections and cpus of the node are removed.
+ *
+ * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
+ * and online/offline operations before this call.
+ */
void try_offline_node(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
return;
}
- if (stop_machine(check_and_unmap_cpu_on_node, pgdat, NULL))
+ if (check_and_unmap_cpu_on_node(pgdat))
return;
/*
}
EXPORT_SYMBOL(try_offline_node);
+/**
+ * remove_memory
+ *
+ * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
+ * and online/offline operations before this call, as required by
+ * try_offline_node().
+ */
void __ref remove_memory(int nid, u64 start, u64 size)
{
int ret;
+ BUG_ON(check_hotplug_memory_range(start, size));
+
lock_memory_hotplug();
/*