mm/hugetlb: add mempolicy check in the reservation routine

author Muchun Song <songmuchun@bytedance.com>

Wed, 12 Aug 2020 01:30:32 +0000 (18:30 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 12 Aug 2020 17:57:55 +0000 (10:57 -0700)
author Muchun Song <songmuchun@bytedance.com>
Wed, 12 Aug 2020 01:30:32 +0000 (18:30 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 12 Aug 2020 17:57:55 +0000 (10:57 -0700)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h

index ea9c15b..5f1648c 100644 (file)
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -6,7 +6,7 @@
  #ifndef _LINUX_MEMPOLICY_H
  #define _LINUX_MEMPOLICY_H 1
  
-
+#include <linux/sched.h>
  #include <linux/mmzone.h>
  #include <linux/dax.h>
  #include <linux/slab.h>
@@ -152,6 +152,15 @@ extern int huge_node(struct vm_area_struct *vma,
  extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
  extern bool mempolicy_nodemask_intersects(struct task_struct *tsk,
                                 const nodemask_t *mask);
+extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy);
+
+static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
+{
+       struct mempolicy *mpol = get_task_policy(current);
+
+       return policy_nodemask(gfp, mpol);
+}
+
  extern unsigned int mempolicy_slab_node(void);
  
  extern enum zone_type policy_zone;
@@ -281,5 +290,10 @@ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
  static inline void mpol_put_task_policy(struct task_struct *task)
  {
  }
+
+static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
+{
+       return NULL;
+}
  #endif /* CONFIG_NUMA */
  #endif
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index e52c878..dffafb5 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3458,13 +3458,21 @@ static int __init default_hugepagesz_setup(char *s)
  }
  __setup("default_hugepagesz=", default_hugepagesz_setup);
  
-static unsigned int cpuset_mems_nr(unsigned int *array)
+static unsigned int allowed_mems_nr(struct hstate *h)
  {
         int node;
         unsigned int nr = 0;
+       nodemask_t *mpol_allowed;
+       unsigned int *array = h->free_huge_pages_node;
+       gfp_t gfp_mask = htlb_alloc_mask(h);
+
+       mpol_allowed = policy_nodemask_current(gfp_mask);
  
-       for_each_node_mask(node, cpuset_current_mems_allowed)
-               nr += array[node];
+       for_each_node_mask(node, cpuset_current_mems_allowed) {
+               if (!mpol_allowed ||
+                   (mpol_allowed && node_isset(node, *mpol_allowed)))
+                       nr += array[node];
+       }
  
         return nr;
  }
@@ -3643,12 +3651,18 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
          * we fall back to check against current free page availability as
          * a best attempt and hopefully to minimize the impact of changing
          * semantics that cpuset has.
+        *
+        * Apart from cpuset, we also have memory policy mechanism that
+        * also determines from which node the kernel will allocate memory
+        * in a NUMA system. So similar to cpuset, we also should consider
+        * the memory policy of the current task. Similar to the description
+        * above.
          */
         if (delta > 0) {
                 if (gather_surplus_pages(h, delta) < 0)
                         goto out;
  
-               if (delta > cpuset_mems_nr(h->free_huge_pages_node)) {
+               if (delta > allowed_mems_nr(h)) {
                         return_unused_surplus_pages(h, delta);
                         goto out;
                 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index b9e85d4..7af44d7 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1890,7 +1890,7 @@ static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
   * Return a nodemask representing a mempolicy for filtering nodes for
   * page allocation
   */
-static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
+nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
  {
         /* Lower zones don't get a nodemask applied for MPOL_BIND */
         if (unlikely(policy->mode == MPOL_BIND) &&
author	Muchun Song <songmuchun@bytedance.com>
	Wed, 12 Aug 2020 01:30:32 +0000 (18:30 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 12 Aug 2020 17:57:55 +0000 (10:57 -0700)
include/linux/mempolicy.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history
mm/mempolicy.c		patch \| blob \| history