swap: fix races exposed by swap discard
[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / swapfile.c
index dac47c6..98e52e3 100644 (file)
@@ -370,7 +370,8 @@ static void dec_cluster_info_page(struct swap_info_struct *p,
                 * instead of free it immediately. The cluster will be freed
                 * after discard.
                 */
-               if (p->flags & SWP_PAGE_DISCARD) {
+               if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
+                                (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
                        swap_cluster_schedule_discard(p, idx);
                        return;
                }
@@ -1288,7 +1289,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
                        else
                                continue;
                }
-               count = si->swap_map[i];
+               count = ACCESS_ONCE(si->swap_map[i]);
                if (count && swap_count(count) != SWAP_MAP_BAD)
                        break;
        }
@@ -1308,7 +1309,11 @@ int try_to_unuse(unsigned int type, bool frontswap,
 {
        struct swap_info_struct *si = swap_info[type];
        struct mm_struct *start_mm;
-       unsigned char *swap_map;
+       volatile unsigned char *swap_map; /* swap_map is accessed without
+                                          * locking. Mark it as volatile
+                                          * to prevent compiler doing
+                                          * something odd.
+                                          */
        unsigned char swcount;
        struct page *page;
        swp_entry_t entry;
@@ -1359,7 +1364,15 @@ int try_to_unuse(unsigned int type, bool frontswap,
                         * reused since sys_swapoff() already disabled
                         * allocation from here, or alloc_page() failed.
                         */
-                       if (!*swap_map)
+                       swcount = *swap_map;
+                       /*
+                        * We don't hold lock here, so the swap entry could be
+                        * SWAP_MAP_BAD (when the cluster is discarding).
+                        * Instead of fail out, We can just skip the swap
+                        * entry because swapoff will wait for discarding
+                        * finish anyway.
+                        */
+                       if (!swcount || swcount == SWAP_MAP_BAD)
                                continue;
                        retval = -ENOMEM;
                        break;
@@ -2543,6 +2556,16 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
                goto unlock_out;
 
        count = p->swap_map[offset];
+
+       /*
+        * swapin_readahead() doesn't check if a swap entry is valid, so the
+        * swap entry could be SWAP_MAP_BAD. Check here with lock held.
+        */
+       if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
+               err = -ENOENT;
+               goto unlock_out;
+       }
+
        has_cache = count & SWAP_HAS_CACHE;
        count &= ~SWAP_HAS_CACHE;
        err = 0;