Merge remote-tracking branch 'stable/linux-5.15.y' into rpi-5.15.y

[platform/kernel/linux-rpi.git] / mm / memory-failure.c
diff --git a/mm/memory-failure.c b/mm/memory-failure.c

index c71135e..2ad0f45 100644 (file)
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -57,6 +57,7 @@
  #include <linux/ratelimit.h>
  #include <linux/page-isolation.h>
  #include <linux/pagewalk.h>
+#include <linux/shmem_fs.h>
  #include "internal.h"
  #include "ras/ras_event.h"
  
@@ -700,6 +701,9 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
         };
         priv.tk.tsk = p;
  
+       if (!p->mm)
+               return -EFAULT;
+
         mmap_read_lock(p->mm);
         ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
                               (void *)&priv);
@@ -808,12 +812,44 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
         return ret;
  }
  
+struct page_state {
+       unsigned long mask;
+       unsigned long res;
+       enum mf_action_page_type type;
+
+       /* Callback ->action() has to unlock the relevant page inside it. */
+       int (*action)(struct page_state *ps, struct page *p);
+};
+
+/*
+ * Return true if page is still referenced by others, otherwise return
+ * false.
+ *
+ * The extra_pins is true when one extra refcount is expected.
+ */
+static bool has_extra_refcount(struct page_state *ps, struct page *p,
+                              bool extra_pins)
+{
+       int count = page_count(p) - 1;
+
+       if (extra_pins)
+               count -= 1;
+
+       if (count > 0) {
+               pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
+                      page_to_pfn(p), action_page_types[ps->type], count);
+               return true;
+       }
+
+       return false;
+}
+
  /*
   * Error hit kernel page.
   * Do nothing, try to be lucky and not touch this instead. For a few cases we
   * could be more sophisticated.
   */
-static int me_kernel(struct page *p, unsigned long pfn)
+static int me_kernel(struct page_state *ps, struct page *p)
  {
         unlock_page(p);
         return MF_IGNORED;
@@ -822,9 +858,9 @@ static int me_kernel(struct page *p, unsigned long pfn)
  /*
   * Page in unknown state. Do nothing.
   */
-static int me_unknown(struct page *p, unsigned long pfn)
+static int me_unknown(struct page_state *ps, struct page *p)
  {
-       pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
+       pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p));
         unlock_page(p);
         return MF_FAILED;
  }
@@ -832,10 +868,11 @@ static int me_unknown(struct page *p, unsigned long pfn)
  /*
   * Clean (or cleaned) page cache page.
   */
-static int me_pagecache_clean(struct page *p, unsigned long pfn)
+static int me_pagecache_clean(struct page_state *ps, struct page *p)
  {
         int ret;
         struct address_space *mapping;
+       bool extra_pins;
  
         delete_from_lru_cache(p);
  
@@ -865,13 +902,23 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
         }
  
         /*
+        * The shmem page is kept in page cache instead of truncating
+        * so is expected to have an extra refcount after error-handling.
+        */
+       extra_pins = shmem_mapping(mapping);
+
+       /*
          * Truncation is a bit tricky. Enable it per file system for now.
          *
          * Open: to take i_rwsem or not for this? Right now we don't.
          */
-       ret = truncate_error_page(p, pfn, mapping);
+       ret = truncate_error_page(p, page_to_pfn(p), mapping);
+       if (has_extra_refcount(ps, p, extra_pins))
+               ret = MF_FAILED;
+
  out:
         unlock_page(p);
+
         return ret;
  }
  
@@ -880,7 +927,7 @@ out:
   * Issues: when the error hit a hole page the error is not properly
   * propagated.
   */
-static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+static int me_pagecache_dirty(struct page_state *ps, struct page *p)
  {
         struct address_space *mapping = page_mapping(p);
  
@@ -924,7 +971,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
                 mapping_set_error(mapping, -EIO);
         }
  
-       return me_pagecache_clean(p, pfn);
+       return me_pagecache_clean(ps, p);
  }
  
  /*
@@ -946,9 +993,10 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
   * Clean swap cache pages can be directly isolated. A later page fault will
   * bring in the known good data from disk.
   */
-static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+static int me_swapcache_dirty(struct page_state *ps, struct page *p)
  {
         int ret;
+       bool extra_pins = false;
  
         ClearPageDirty(p);
         /* Trigger EIO in shmem: */
@@ -956,10 +1004,17 @@ static int me_swapcache_dirty(struct page *p, unsigned long pfn)
  
         ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
         unlock_page(p);
+
+       if (ret == MF_DELAYED)
+               extra_pins = true;
+
+       if (has_extra_refcount(ps, p, extra_pins))
+               ret = MF_FAILED;
+
         return ret;
  }
  
-static int me_swapcache_clean(struct page *p, unsigned long pfn)
+static int me_swapcache_clean(struct page_state *ps, struct page *p)
  {
         int ret;
  
@@ -967,6 +1022,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
  
         ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
         unlock_page(p);
+
+       if (has_extra_refcount(ps, p, false))
+               ret = MF_FAILED;
+
         return ret;
  }
  
@@ -976,18 +1035,21 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
   * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
   *   To narrow down kill region to one page, we need to break up pmd.
   */
-static int me_huge_page(struct page *p, unsigned long pfn)
+static int me_huge_page(struct page_state *ps, struct page *p)
  {
         int res;
         struct page *hpage = compound_head(p);
         struct address_space *mapping;
+       bool extra_pins = false;
  
         if (!PageHuge(hpage))
                 return MF_DELAYED;
  
         mapping = page_mapping(hpage);
         if (mapping) {
-               res = truncate_error_page(hpage, pfn, mapping);
+               res = truncate_error_page(hpage, page_to_pfn(p), mapping);
+               /* The page is kept in page cache. */
+               extra_pins = true;
                 unlock_page(hpage);
         } else {
                 res = MF_FAILED;
@@ -1005,6 +1067,9 @@ static int me_huge_page(struct page *p, unsigned long pfn)
                 }
         }
  
+       if (has_extra_refcount(ps, p, extra_pins))
+               res = MF_FAILED;
+
         return res;
  }
  
@@ -1030,14 +1095,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
  #define slab           (1UL << PG_slab)
  #define reserved       (1UL << PG_reserved)
  
-static struct page_state {
-       unsigned long mask;
-       unsigned long res;
-       enum mf_action_page_type type;
-
-       /* Callback ->action() has to unlock the relevant page inside it. */
-       int (*action)(struct page *p, unsigned long pfn);
-} error_states[] = {
+static struct page_state error_states[] = {
         { reserved,     reserved,       MF_MSG_KERNEL,  me_kernel },
         /*
          * free pages are specially detected outside this table:
@@ -1097,19 +1155,10 @@ static int page_action(struct page_state *ps, struct page *p,
                         unsigned long pfn)
  {
         int result;
-       int count;
  
         /* page p should be unlocked after returning from ps->action().  */
-       result = ps->action(p, pfn);
+       result = ps->action(ps, p);
  
-       count = page_count(p) - 1;
-       if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
-               count--;
-       if (count > 0) {
-               pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
-                      pfn, action_page_types[ps->type], count);
-               result = MF_FAILED;
-       }
         action_result(pfn, ps->type, result);
  
         /* Could do more checks here if page looks ok */
@@ -1402,14 +1451,11 @@ static int identify_page_state(unsigned long pfn, struct page *p,
  static int try_to_split_thp_page(struct page *page, const char *msg)
  {
         lock_page(page);
-       if (!PageAnon(page) || unlikely(split_huge_page(page))) {
+       if (unlikely(split_huge_page(page))) {
                 unsigned long pfn = page_to_pfn(page);
  
                 unlock_page(page);
-               if (!PageAnon(page))
-                       pr_info("%s: %#lx: non anonymous thp\n", msg, pfn);
-               else
-                       pr_info("%s: %#lx: thp split failed\n", msg, pfn);
+               pr_info("%s: %#lx: thp split failed\n", msg, pfn);
                 put_page(page);
                 return -EBUSY;
         }