Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

[platform/kernel/linux-rpi.git] / mm / memory_hotplug.c
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index f9d3208..9fd0be3 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -477,15 +477,13 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
                                  sizeof(struct page) * cur_nr_pages);
         }
  
-#ifdef CONFIG_ZONE_DEVICE
         /*
          * Zone shrinking code cannot properly deal with ZONE_DEVICE. So
          * we will not try to shrink the zones - which is okay as
          * set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
          */
-       if (zone_idx(zone) == ZONE_DEVICE)
+       if (zone_is_zone_device(zone))
                 return;
-#endif
  
         clear_zone_contiguous(zone);
  
@@ -752,11 +750,44 @@ static void auto_movable_stats_account_zone(struct auto_movable_stats *stats,
  #endif /* CONFIG_CMA */
         }
  }
+struct auto_movable_group_stats {
+       unsigned long movable_pages;
+       unsigned long req_kernel_early_pages;
+};
  
-static bool auto_movable_can_online_movable(int nid, unsigned long nr_pages)
+static int auto_movable_stats_account_group(struct memory_group *group,
+                                          void *arg)
+{
+       const int ratio = READ_ONCE(auto_movable_ratio);
+       struct auto_movable_group_stats *stats = arg;
+       long pages;
+
+       /*
+        * We don't support modifying the config while the auto-movable online
+        * policy is already enabled. Just avoid the division by zero below.
+        */
+       if (!ratio)
+               return 0;
+
+       /*
+        * Calculate how many early kernel pages this group requires to
+        * satisfy the configured zone ratio.
+        */
+       pages = group->present_movable_pages * 100 / ratio;
+       pages -= group->present_kernel_pages;
+
+       if (pages > 0)
+               stats->req_kernel_early_pages += pages;
+       stats->movable_pages += group->present_movable_pages;
+       return 0;
+}
+
+static bool auto_movable_can_online_movable(int nid, struct memory_group *group,
+                                           unsigned long nr_pages)
  {
-       struct auto_movable_stats stats = {};
         unsigned long kernel_early_pages, movable_pages;
+       struct auto_movable_group_stats group_stats = {};
+       struct auto_movable_stats stats = {};
         pg_data_t *pgdat = NODE_DATA(nid);
         struct zone *zone;
         int i;
@@ -778,6 +809,21 @@ static bool auto_movable_can_online_movable(int nid, unsigned long nr_pages)
         movable_pages = stats.movable_pages;
  
         /*
+        * Kernel memory inside dynamic memory group allows for more MOVABLE
+        * memory within the same group. Remove the effect of all but the
+        * current group from the stats.
+        */
+       walk_dynamic_memory_groups(nid, auto_movable_stats_account_group,
+                                  group, &group_stats);
+       if (kernel_early_pages <= group_stats.req_kernel_early_pages)
+               return false;
+       kernel_early_pages -= group_stats.req_kernel_early_pages;
+       movable_pages -= group_stats.movable_pages;
+
+       if (group && group->is_dynamic)
+               kernel_early_pages += group->present_kernel_pages;
+
+       /*
          * Test if we could online the given number of pages to ZONE_MOVABLE
          * and still stay in the configured ratio.
          */
@@ -834,6 +880,10 @@ static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn
   *    with unmovable allocations). While there are corner cases where it might
   *    still work, it is barely relevant in practice.
   *
+ * Exceptions are dynamic memory groups, which allow for more MOVABLE
+ * memory within the same memory group -- because in that case, there is
+ * coordination within the single memory device managed by a single driver.
+ *
   * We rely on "present pages" instead of "managed pages", as the latter is
   * highly unreliable and dynamic in virtualized environments, and does not
   * consider boot time allocations. For example, memory ballooning adjusts the
@@ -852,18 +902,59 @@ static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn
   *    "present pages" is an upper limit that can get reached at runtime. As
   *    we base our calculations on KERNEL_EARLY, this is not an issue.
   */
-static struct zone *auto_movable_zone_for_pfn(int nid, unsigned long pfn,
+static struct zone *auto_movable_zone_for_pfn(int nid,
+                                             struct memory_group *group,
+                                             unsigned long pfn,
                                               unsigned long nr_pages)
  {
+       unsigned long online_pages = 0, max_pages, end_pfn;
+       struct page *page;
+
         if (!auto_movable_ratio)
                 goto kernel_zone;
  
-       if (!auto_movable_can_online_movable(NUMA_NO_NODE, nr_pages))
+       if (group && !group->is_dynamic) {
+               max_pages = group->s.max_pages;
+               online_pages = group->present_movable_pages;
+
+               /* If anything is !MOVABLE online the rest !MOVABLE. */
+               if (group->present_kernel_pages)
+                       goto kernel_zone;
+       } else if (!group || group->d.unit_pages == nr_pages) {
+               max_pages = nr_pages;
+       } else {
+               max_pages = group->d.unit_pages;
+               /*
+                * Take a look at all online sections in the current unit.
+                * We can safely assume that all pages within a section belong
+                * to the same zone, because dynamic memory groups only deal
+                * with hotplugged memory.
+                */
+               pfn = ALIGN_DOWN(pfn, group->d.unit_pages);
+               end_pfn = pfn + group->d.unit_pages;
+               for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+                       page = pfn_to_online_page(pfn);
+                       if (!page)
+                               continue;
+                       /* If anything is !MOVABLE online the rest !MOVABLE. */
+                       if (page_zonenum(page) != ZONE_MOVABLE)
+                               goto kernel_zone;
+                       online_pages += PAGES_PER_SECTION;
+               }
+       }
+
+       /*
+        * Online MOVABLE if we could *currently* online all remaining parts
+        * MOVABLE. We expect to (add+) online them immediately next, so if
+        * nobody interferes, all will be MOVABLE if possible.
+        */
+       nr_pages = max_pages - online_pages;
+       if (!auto_movable_can_online_movable(NUMA_NO_NODE, group, nr_pages))
                 goto kernel_zone;
  
  #ifdef CONFIG_NUMA
         if (auto_movable_numa_aware &&
-           !auto_movable_can_online_movable(nid, nr_pages))
+           !auto_movable_can_online_movable(nid, group, nr_pages))
                 goto kernel_zone;
  #endif /* CONFIG_NUMA */
  
@@ -897,7 +988,8 @@ static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn
  }
  
  struct zone *zone_for_pfn_range(int online_type, int nid,
-               unsigned long start_pfn, unsigned long nr_pages)
+               struct memory_group *group, unsigned long start_pfn,
+               unsigned long nr_pages)
  {
         if (online_type == MMOP_ONLINE_KERNEL)
                 return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
@@ -906,7 +998,7 @@ struct zone *zone_for_pfn_range(int online_type, int nid,
                 return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
  
         if (online_policy == ONLINE_POLICY_AUTO_MOVABLE)
-               return auto_movable_zone_for_pfn(nid, start_pfn, nr_pages);
+               return auto_movable_zone_for_pfn(nid, group, start_pfn, nr_pages);
  
         return default_zone_for_pfn(nid, start_pfn, nr_pages);
  }
@@ -915,9 +1007,11 @@ struct zone *zone_for_pfn_range(int online_type, int nid,
   * This function should only be called by memory_block_{online,offline},
   * and {online,offline}_pages.
   */
-void adjust_present_page_count(struct page *page, long nr_pages)
+void adjust_present_page_count(struct page *page, struct memory_group *group,
+                              long nr_pages)
  {
         struct zone *zone = page_zone(page);
+       const bool movable = zone_idx(zone) == ZONE_MOVABLE;
  
         /*
          * We only support onlining/offlining/adding/removing of complete
@@ -927,6 +1021,11 @@ void adjust_present_page_count(struct page *page, long nr_pages)
                 zone->present_early_pages += nr_pages;
         zone->present_pages += nr_pages;
         zone->zone_pgdat->node_present_pages += nr_pages;
+
+       if (group && movable)
+               group->present_movable_pages += nr_pages;
+       else if (group && !movable)
+               group->present_kernel_pages += nr_pages;
  }
  
  int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
@@ -972,7 +1071,8 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
         kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
  }
  
-int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone)
+int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+                      struct zone *zone, struct memory_group *group)
  {
         unsigned long flags;
         int need_zonelists_rebuild = 0;
@@ -1025,7 +1125,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z
         }
  
         online_pages_range(pfn, nr_pages);
-       adjust_present_page_count(pfn_to_page(pfn), nr_pages);
+       adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
  
         node_states_set_node(nid, &arg);
         if (need_zonelists_rebuild)
@@ -1258,6 +1358,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
  {
         struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
         struct vmem_altmap mhp_altmap = {};
+       struct memory_group *group = NULL;
         u64 start, size;
         bool new_node = false;
         int ret;
@@ -1269,6 +1370,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
         if (ret)
                 return ret;
  
+       if (mhp_flags & MHP_NID_IS_MGID) {
+               group = memory_group_find_by_id(nid);
+               if (!group)
+                       return -EINVAL;
+               nid = group->nid;
+       }
+
         if (!node_possible(nid)) {
                 WARN(1, "node %d was absent from the node_possible_map\n", nid);
                 return -EINVAL;
@@ -1303,7 +1411,8 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
                 goto error;
  
         /* create memory block devices after memory was added */
-       ret = create_memory_block_devices(start, size, mhp_altmap.alloc);
+       ret = create_memory_block_devices(start, size, mhp_altmap.alloc,
+                                         group);
         if (ret) {
                 arch_remove_memory(start, size, NULL);
                 goto error;
@@ -1661,7 +1770,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                 if (nodes_empty(nmask))
                         node_set(mtc.nid, nmask);
                 ret = migrate_pages(&source, alloc_migration_target, NULL,
-                       (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
+                       (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG, NULL);
                 if (ret) {
                         list_for_each_entry(page, &source, lru) {
                                 if (__ratelimit(&migrate_rs)) {
@@ -1760,7 +1869,8 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
         return 0;
  }
  
-int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
+int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+                       struct memory_group *group)
  {
         const unsigned long end_pfn = start_pfn + nr_pages;
         unsigned long pfn, system_ram_pages = 0;
@@ -1896,7 +2006,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
  
         /* removal success */
         adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
-       adjust_present_page_count(pfn_to_page(start_pfn), -nr_pages);
+       adjust_present_page_count(pfn_to_page(start_pfn), group, -nr_pages);
  
         /* reinitialise watermarks and update pcp limits */
         init_per_zone_wmark_min();