Merge tag 'backport/v3.14.24-ltsi-rc1/rcar-snd-to-v3.18-rc6' into backport/v3.14...
[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / memory-failure.c
index fabe550..a98c7fc 100644 (file)
@@ -208,9 +208,9 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
 #endif
        si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
 
-       if ((flags & MF_ACTION_REQUIRED) && t == current) {
+       if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
                si.si_code = BUS_MCEERR_AR;
-               ret = force_sig_info(SIGBUS, &si, t);
+               ret = force_sig_info(SIGBUS, &si, current);
        } else {
                /*
                 * Don't use force here, it's convenient if the signal
@@ -384,20 +384,51 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno,
        }
 }
 
-static int task_early_kill(struct task_struct *tsk)
+/*
+ * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO)
+ * on behalf of the thread group. Return task_struct of the (first found)
+ * dedicated thread if found, and return NULL otherwise.
+ *
+ * We already hold read_lock(&tasklist_lock) in the caller, so we don't
+ * have to call rcu_read_lock/unlock() in this function.
+ */
+static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
 {
+       struct task_struct *t;
+
+       for_each_thread(tsk, t)
+               if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY))
+                       return t;
+       return NULL;
+}
+
+/*
+ * Determine whether a given process is "early kill" process which expects
+ * to be signaled when some page under the process is hwpoisoned.
+ * Return task_struct of the dedicated thread (main thread unless explicitly
+ * specified) if the process is "early kill," and otherwise returns NULL.
+ */
+static struct task_struct *task_early_kill(struct task_struct *tsk,
+                                          int force_early)
+{
+       struct task_struct *t;
        if (!tsk->mm)
-               return 0;
-       if (tsk->flags & PF_MCE_PROCESS)
-               return !!(tsk->flags & PF_MCE_EARLY);
-       return sysctl_memory_failure_early_kill;
+               return NULL;
+       if (force_early)
+               return tsk;
+       t = find_early_kill_thread(tsk);
+       if (t)
+               return t;
+       if (sysctl_memory_failure_early_kill)
+               return tsk;
+       return NULL;
 }
 
 /*
  * Collect processes when the error hit an anonymous page.
  */
 static void collect_procs_anon(struct page *page, struct list_head *to_kill,
-                             struct to_kill **tkc)
+                             struct to_kill **tkc, int force_early)
 {
        struct vm_area_struct *vma;
        struct task_struct *tsk;
@@ -412,16 +443,17 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
        read_lock(&tasklist_lock);
        for_each_process (tsk) {
                struct anon_vma_chain *vmac;
+               struct task_struct *t = task_early_kill(tsk, force_early);
 
-               if (!task_early_kill(tsk))
+               if (!t)
                        continue;
                anon_vma_interval_tree_foreach(vmac, &av->rb_root,
                                               pgoff, pgoff) {
                        vma = vmac->vma;
                        if (!page_mapped_in_vma(page, vma))
                                continue;
-                       if (vma->vm_mm == tsk->mm)
-                               add_to_kill(tsk, page, vma, to_kill, tkc);
+                       if (vma->vm_mm == t->mm)
+                               add_to_kill(t, page, vma, to_kill, tkc);
                }
        }
        read_unlock(&tasklist_lock);
@@ -432,7 +464,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
  * Collect processes when the error hit a file mapped page.
  */
 static void collect_procs_file(struct page *page, struct list_head *to_kill,
-                             struct to_kill **tkc)
+                             struct to_kill **tkc, int force_early)
 {
        struct vm_area_struct *vma;
        struct task_struct *tsk;
@@ -442,10 +474,10 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
        read_lock(&tasklist_lock);
        for_each_process(tsk) {
                pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+               struct task_struct *t = task_early_kill(tsk, force_early);
 
-               if (!task_early_kill(tsk))
+               if (!t)
                        continue;
-
                vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
                                      pgoff) {
                        /*
@@ -455,8 +487,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
                         * Assume applications who requested early kill want
                         * to be informed of all such data corruptions.
                         */
-                       if (vma->vm_mm == tsk->mm)
-                               add_to_kill(tsk, page, vma, to_kill, tkc);
+                       if (vma->vm_mm == t->mm)
+                               add_to_kill(t, page, vma, to_kill, tkc);
                }
        }
        read_unlock(&tasklist_lock);
@@ -469,7 +501,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
  * First preallocate one tokill structure outside the spin locks,
  * so that we can kill at least one process reasonably reliable.
  */
-static void collect_procs(struct page *page, struct list_head *tokill)
+static void collect_procs(struct page *page, struct list_head *tokill,
+                               int force_early)
 {
        struct to_kill *tk;
 
@@ -480,9 +513,9 @@ static void collect_procs(struct page *page, struct list_head *tokill)
        if (!tk)
                return;
        if (PageAnon(page))
-               collect_procs_anon(page, tokill, &tk);
+               collect_procs_anon(page, tokill, &tk, force_early);
        else
-               collect_procs_file(page, tokill, &tk);
+               collect_procs_file(page, tokill, &tk, force_early);
        kfree(tk);
 }
 
@@ -611,7 +644,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 }
 
 /*
- * Dirty cache page page
+ * Dirty pagecache page
  * Issues: when the error hit a hole page the error is not properly
  * propagated.
  */
@@ -856,14 +889,14 @@ static int page_action(struct page_state *ps, struct page *p,
  * the pages and send SIGBUS to the processes if the data was dirty.
  */
 static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
-                                 int trapno, int flags)
+                                 int trapno, int flags, struct page **hpagep)
 {
        enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
        struct address_space *mapping;
        LIST_HEAD(tokill);
        int ret;
        int kill = 1, forcekill;
-       struct page *hpage = compound_head(p);
+       struct page *hpage = *hpagep;
        struct page *ppage;
 
        if (PageReserved(p) || PageSlab(p))
@@ -942,11 +975,16 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
                         * We pinned the head page for hwpoison handling,
                         * now we split the thp and we are interested in
                         * the hwpoisoned raw page, so move the refcount
-                        * to it.
+                        * to it. Similarly, page lock is shifted.
                         */
                        if (hpage != p) {
-                               put_page(hpage);
-                               get_page(p);
+                               if (!(flags & MF_COUNT_INCREASED)) {
+                                       put_page(hpage);
+                                       get_page(p);
+                               }
+                               lock_page(p);
+                               unlock_page(hpage);
+                               *hpagep = p;
                        }
                        /* THP is split, so ppage should be the real poisoned page. */
                        ppage = p;
@@ -962,19 +1000,13 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
         * there's nothing that can be done.
         */
        if (kill)
-               collect_procs(ppage, &tokill);
-
-       if (hpage != ppage)
-               lock_page(ppage);
+               collect_procs(ppage, &tokill, flags & MF_ACTION_REQUIRED);
 
        ret = try_to_unmap(ppage, ttu);
        if (ret != SWAP_SUCCESS)
                printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
                                pfn, page_mapcount(ppage));
 
-       if (hpage != ppage)
-               unlock_page(ppage);
-
        /*
         * Now that the dirty bit has been propagated to the
         * struct page and all unmaps done we can decide if
@@ -1086,15 +1118,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
                        return 0;
                } else if (PageHuge(hpage)) {
                        /*
-                        * Check "just unpoisoned", "filter hit", and
-                        * "race with other subpage."
+                        * Check "filter hit" and "race with other subpage."
                         */
                        lock_page(hpage);
-                       if (!PageHWPoison(hpage)
-                           || (hwpoison_filter(p) && TestClearPageHWPoison(p))
-                           || (p != hpage && TestSetPageHWPoison(hpage))) {
-                               atomic_long_sub(nr_pages, &num_poisoned_pages);
-                               return 0;
+                       if (PageHWPoison(hpage)) {
+                               if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
+                                   || (p != hpage && TestSetPageHWPoison(hpage))) {
+                                       atomic_long_sub(nr_pages, &num_poisoned_pages);
+                                       unlock_page(hpage);
+                                       return 0;
+                               }
                        }
                        set_page_hwpoison_huge_page(hpage);
                        res = dequeue_hwpoisoned_huge_page(hpage);
@@ -1157,6 +1190,8 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
         */
        if (!PageHWPoison(p)) {
                printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
+               atomic_long_sub(nr_pages, &num_poisoned_pages);
+               put_page(hpage);
                res = 0;
                goto out;
        }
@@ -1193,8 +1228,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
        /*
         * Now take care of user space mappings.
         * Abort on fail: __delete_from_page_cache() assumes unmapped page.
+        *
+        * When the raw error page is thp tail page, hpage points to the raw
+        * page after thp split.
         */
-       if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) {
+       if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage)
+           != SWAP_SUCCESS) {
                printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
                res = -EBUSY;
                goto out;
@@ -1501,7 +1540,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
 
        /* Keep page count to indicate a given hugepage is isolated. */
        list_move(&hpage->lru, &pagelist);
-       ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+       ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
                                MIGRATE_SYNC, MR_MEMORY_FAILURE);
        if (ret) {
                pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
@@ -1582,10 +1621,16 @@ static int __soft_offline_page(struct page *page, int flags)
                inc_zone_page_state(page, NR_ISOLATED_ANON +
                                        page_is_file_cache(page));
                list_add(&page->lru, &pagelist);
-               ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+               ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
                                        MIGRATE_SYNC, MR_MEMORY_FAILURE);
                if (ret) {
-                       putback_lru_pages(&pagelist);
+                       if (!list_empty(&pagelist)) {
+                               list_del(&page->lru);
+                               dec_zone_page_state(page, NR_ISOLATED_ANON +
+                                               page_is_file_cache(page));
+                               putback_lru_page(page);
+                       }
+
                        pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
                                pfn, ret, page->flags);
                        if (ret > 0)
@@ -1642,7 +1687,7 @@ int soft_offline_page(struct page *page, int flags)
 {
        int ret;
        unsigned long pfn = page_to_pfn(page);
-       struct page *hpage = compound_trans_head(page);
+       struct page *hpage = compound_head(page);
 
        if (PageHWPoison(page)) {
                pr_info("soft offline: %#lx page already poisoned\n", pfn);