sizeof(struct page) * cur_nr_pages);
}
-#ifdef CONFIG_ZONE_DEVICE
/*
* Zone shrinking code cannot properly deal with ZONE_DEVICE. So
* we will not try to shrink the zones - which is okay as
* set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
*/
- if (zone_idx(zone) == ZONE_DEVICE)
+ if (zone_is_zone_device(zone))
return;
-#endif
clear_zone_contiguous(zone);
#endif /* CONFIG_CMA */
}
}
+struct auto_movable_group_stats {
+ unsigned long movable_pages;
+ unsigned long req_kernel_early_pages;
+};
-static bool auto_movable_can_online_movable(int nid, unsigned long nr_pages)
+static int auto_movable_stats_account_group(struct memory_group *group,
+ void *arg)
+{
+ const int ratio = READ_ONCE(auto_movable_ratio);
+ struct auto_movable_group_stats *stats = arg;
+ long pages;
+
+ /*
+ * We don't support modifying the config while the auto-movable online
+ * policy is already enabled. Just avoid the division by zero below.
+ */
+ if (!ratio)
+ return 0;
+
+ /*
+ * Calculate how many early kernel pages this group requires to
+ * satisfy the configured zone ratio.
+ */
+ pages = group->present_movable_pages * 100 / ratio;
+ pages -= group->present_kernel_pages;
+
+ if (pages > 0)
+ stats->req_kernel_early_pages += pages;
+ stats->movable_pages += group->present_movable_pages;
+ return 0;
+}
+
+static bool auto_movable_can_online_movable(int nid, struct memory_group *group,
+ unsigned long nr_pages)
{
- struct auto_movable_stats stats = {};
unsigned long kernel_early_pages, movable_pages;
+ struct auto_movable_group_stats group_stats = {};
+ struct auto_movable_stats stats = {};
pg_data_t *pgdat = NODE_DATA(nid);
struct zone *zone;
int i;
movable_pages = stats.movable_pages;
/*
+ * Kernel memory inside dynamic memory group allows for more MOVABLE
+ * memory within the same group. Remove the effect of all but the
+ * current group from the stats.
+ */
+ walk_dynamic_memory_groups(nid, auto_movable_stats_account_group,
+ group, &group_stats);
+ if (kernel_early_pages <= group_stats.req_kernel_early_pages)
+ return false;
+ kernel_early_pages -= group_stats.req_kernel_early_pages;
+ movable_pages -= group_stats.movable_pages;
+
+ if (group && group->is_dynamic)
+ kernel_early_pages += group->present_kernel_pages;
+
+ /*
* Test if we could online the given number of pages to ZONE_MOVABLE
* and still stay in the configured ratio.
*/
* with unmovable allocations). While there are corner cases where it might
* still work, it is barely relevant in practice.
*
+ * Exceptions are dynamic memory groups, which allow for more MOVABLE
+ * memory within the same memory group -- because in that case, there is
+ * coordination within the single memory device managed by a single driver.
+ *
* We rely on "present pages" instead of "managed pages", as the latter is
* highly unreliable and dynamic in virtualized environments, and does not
* consider boot time allocations. For example, memory ballooning adjusts the
* "present pages" is an upper limit that can get reached at runtime. As
* we base our calculations on KERNEL_EARLY, this is not an issue.
*/
-static struct zone *auto_movable_zone_for_pfn(int nid, unsigned long pfn,
+static struct zone *auto_movable_zone_for_pfn(int nid,
+ struct memory_group *group,
+ unsigned long pfn,
unsigned long nr_pages)
{
+ unsigned long online_pages = 0, max_pages, end_pfn;
+ struct page *page;
+
if (!auto_movable_ratio)
goto kernel_zone;
- if (!auto_movable_can_online_movable(NUMA_NO_NODE, nr_pages))
+ if (group && !group->is_dynamic) {
+ max_pages = group->s.max_pages;
+ online_pages = group->present_movable_pages;
+
+ /* If anything is !MOVABLE online the rest !MOVABLE. */
+ if (group->present_kernel_pages)
+ goto kernel_zone;
+ } else if (!group || group->d.unit_pages == nr_pages) {
+ max_pages = nr_pages;
+ } else {
+ max_pages = group->d.unit_pages;
+ /*
+ * Take a look at all online sections in the current unit.
+ * We can safely assume that all pages within a section belong
+ * to the same zone, because dynamic memory groups only deal
+ * with hotplugged memory.
+ */
+ pfn = ALIGN_DOWN(pfn, group->d.unit_pages);
+ end_pfn = pfn + group->d.unit_pages;
+ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ page = pfn_to_online_page(pfn);
+ if (!page)
+ continue;
+ /* If anything is !MOVABLE online the rest !MOVABLE. */
+ if (page_zonenum(page) != ZONE_MOVABLE)
+ goto kernel_zone;
+ online_pages += PAGES_PER_SECTION;
+ }
+ }
+
+ /*
+ * Online MOVABLE if we could *currently* online all remaining parts
+ * MOVABLE. We expect to (add+) online them immediately next, so if
+ * nobody interferes, all will be MOVABLE if possible.
+ */
+ nr_pages = max_pages - online_pages;
+ if (!auto_movable_can_online_movable(NUMA_NO_NODE, group, nr_pages))
goto kernel_zone;
#ifdef CONFIG_NUMA
if (auto_movable_numa_aware &&
- !auto_movable_can_online_movable(nid, nr_pages))
+ !auto_movable_can_online_movable(nid, group, nr_pages))
goto kernel_zone;
#endif /* CONFIG_NUMA */
}
struct zone *zone_for_pfn_range(int online_type, int nid,
- unsigned long start_pfn, unsigned long nr_pages)
+ struct memory_group *group, unsigned long start_pfn,
+ unsigned long nr_pages)
{
if (online_type == MMOP_ONLINE_KERNEL)
return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
if (online_policy == ONLINE_POLICY_AUTO_MOVABLE)
- return auto_movable_zone_for_pfn(nid, start_pfn, nr_pages);
+ return auto_movable_zone_for_pfn(nid, group, start_pfn, nr_pages);
return default_zone_for_pfn(nid, start_pfn, nr_pages);
}
* This function should only be called by memory_block_{online,offline},
* and {online,offline}_pages.
*/
-void adjust_present_page_count(struct page *page, long nr_pages)
+void adjust_present_page_count(struct page *page, struct memory_group *group,
+ long nr_pages)
{
struct zone *zone = page_zone(page);
+ const bool movable = zone_idx(zone) == ZONE_MOVABLE;
/*
* We only support onlining/offlining/adding/removing of complete
zone->present_early_pages += nr_pages;
zone->present_pages += nr_pages;
zone->zone_pgdat->node_present_pages += nr_pages;
+
+ if (group && movable)
+ group->present_movable_pages += nr_pages;
+ else if (group && !movable)
+ group->present_kernel_pages += nr_pages;
}
int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
}
-int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone)
+int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+ struct zone *zone, struct memory_group *group)
{
unsigned long flags;
int need_zonelists_rebuild = 0;
}
online_pages_range(pfn, nr_pages);
- adjust_present_page_count(pfn_to_page(pfn), nr_pages);
+ adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
node_states_set_node(nid, &arg);
if (need_zonelists_rebuild)
{
struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
struct vmem_altmap mhp_altmap = {};
+ struct memory_group *group = NULL;
u64 start, size;
bool new_node = false;
int ret;
if (ret)
return ret;
+ if (mhp_flags & MHP_NID_IS_MGID) {
+ group = memory_group_find_by_id(nid);
+ if (!group)
+ return -EINVAL;
+ nid = group->nid;
+ }
+
if (!node_possible(nid)) {
WARN(1, "node %d was absent from the node_possible_map\n", nid);
return -EINVAL;
goto error;
/* create memory block devices after memory was added */
- ret = create_memory_block_devices(start, size, mhp_altmap.alloc);
+ ret = create_memory_block_devices(start, size, mhp_altmap.alloc,
+ group);
if (ret) {
arch_remove_memory(start, size, NULL);
goto error;
if (nodes_empty(nmask))
node_set(mtc.nid, nmask);
ret = migrate_pages(&source, alloc_migration_target, NULL,
- (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG, NULL);
if (ret) {
list_for_each_entry(page, &source, lru) {
if (__ratelimit(&migrate_rs)) {
return 0;
}
-int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
+int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+ struct memory_group *group)
{
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn, system_ram_pages = 0;
/* removal success */
adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
- adjust_present_page_count(pfn_to_page(start_pfn), -nr_pages);
+ adjust_present_page_count(pfn_to_page(start_pfn), group, -nr_pages);
/* reinitialise watermarks and update pcp limits */
init_per_zone_wmark_min();