mm: swap: clean up swap readahead
authorMinchan Kim <minchan@kernel.org>
Thu, 5 Apr 2018 23:23:39 +0000 (16:23 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Apr 2018 04:36:25 +0000 (21:36 -0700)
When I see recent change of swap readahead, I am very unhappy about
current code structure which diverges two swap readahead algorithm in
do_swap_page.  This patch is to clean it up.

Main motivation is that fault handler doesn't need to be aware of
readahead algorithms but just should call swapin_readahead.

As first step, this patch cleans up a little bit but not perfect (I just
separate for review easier) so next patch will make the goal complete.

[minchan@kernel.org: do not check readahead flag with THP anon]
Link: http://lkml.kernel.org/r/874lm83zho.fsf@yhuang-dev.intel.com
Link: http://lkml.kernel.org/r/20180227232611.169883-1-minchan@kernel.org
Link: http://lkml.kernel.org/r/1509520520-32367-2-git-send-email-minchan@kernel.org
Link: http://lkml.kernel.org/r/20180220085249.151400-2-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/swap.h
mm/memory.c
mm/swap_state.c

index a1a3f4e..fa92177 100644 (file)
@@ -424,12 +424,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
                        bool *new_page_allocated);
 extern struct page *swapin_readahead(swp_entry_t, gfp_t,
                        struct vm_area_struct *vma, unsigned long addr);
-
-extern struct page *swap_readahead_detect(struct vm_fault *vmf,
-                                         struct vma_swap_readahead *swap_ra);
 extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
-                                          struct vm_fault *vmf,
-                                          struct vma_swap_readahead *swap_ra);
+                                          struct vm_fault *vmf);
 
 /* linux/mm/swapfile.c */
 extern atomic_long_t nr_swap_pages;
@@ -548,15 +544,8 @@ static inline bool swap_use_vma_readahead(void)
        return false;
 }
 
-static inline struct page *swap_readahead_detect(
-       struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
-{
-       return NULL;
-}
-
-static inline struct page *do_swap_page_readahead(
-       swp_entry_t fentry, gfp_t gfp_mask,
-       struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
+static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
+                               gfp_t gfp_mask, struct vm_fault *vmf)
 {
        return NULL;
 }
index aed3732..bc1ccff 100644 (file)
@@ -2883,26 +2883,16 @@ EXPORT_SYMBOL(unmap_mapping_range);
 int do_swap_page(struct vm_fault *vmf)
 {
        struct vm_area_struct *vma = vmf->vma;
-       struct page *page = NULL, *swapcache = NULL;
+       struct page *page = NULL, *swapcache;
        struct mem_cgroup *memcg;
-       struct vma_swap_readahead swap_ra;
        swp_entry_t entry;
        pte_t pte;
        int locked;
        int exclusive = 0;
        int ret = 0;
-       bool vma_readahead = swap_use_vma_readahead();
 
-       if (vma_readahead) {
-               page = swap_readahead_detect(vmf, &swap_ra);
-               swapcache = page;
-       }
-
-       if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
-               if (page)
-                       put_page(page);
+       if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
                goto out;
-       }
 
        entry = pte_to_swp_entry(vmf->orig_pte);
        if (unlikely(non_swap_entry(entry))) {
@@ -2928,11 +2918,8 @@ int do_swap_page(struct vm_fault *vmf)
 
 
        delayacct_set_flag(DELAYACCT_PF_SWAPIN);
-       if (!page) {
-               page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
-                                        vmf->address);
-               swapcache = page;
-       }
+       page = lookup_swap_cache(entry, vma, vmf->address);
+       swapcache = page;
 
        if (!page) {
                struct swap_info_struct *si = swp_swap_info(entry);
@@ -2949,9 +2936,9 @@ int do_swap_page(struct vm_fault *vmf)
                                swap_readpage(page, true);
                        }
                } else {
-                       if (vma_readahead)
+                       if (swap_use_vma_readahead())
                                page = do_swap_page_readahead(entry,
-                                       GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
+                                       GFP_HIGHUSER_MOVABLE, vmf);
                        else
                                page = swapin_readahead(entry,
                                       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
@@ -2982,7 +2969,6 @@ int do_swap_page(struct vm_fault *vmf)
                 */
                ret = VM_FAULT_HWPOISON;
                delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
-               swapcache = page;
                goto out_release;
        }
 
index 39ae7cf..db5da2b 100644 (file)
@@ -332,32 +332,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
                               unsigned long addr)
 {
        struct page *page;
-       unsigned long ra_info;
-       int win, hits, readahead;
 
        page = find_get_page(swap_address_space(entry), swp_offset(entry));
 
        INC_CACHE_INFO(find_total);
        if (page) {
+               bool vma_ra = swap_use_vma_readahead();
+               bool readahead;
+
                INC_CACHE_INFO(find_success);
+               /*
+                * At the moment, we don't support PG_readahead for anon THP
+                * so let's bail out rather than confusing the readahead stat.
+                */
                if (unlikely(PageTransCompound(page)))
                        return page;
+
                readahead = TestClearPageReadahead(page);
-               if (vma) {
-                       ra_info = GET_SWAP_RA_VAL(vma);
-                       win = SWAP_RA_WIN(ra_info);
-                       hits = SWAP_RA_HITS(ra_info);
+               if (vma && vma_ra) {
+                       unsigned long ra_val;
+                       int win, hits;
+
+                       ra_val = GET_SWAP_RA_VAL(vma);
+                       win = SWAP_RA_WIN(ra_val);
+                       hits = SWAP_RA_HITS(ra_val);
                        if (readahead)
                                hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
                        atomic_long_set(&vma->swap_readahead_info,
                                        SWAP_RA_VAL(addr, win, hits));
                }
+
                if (readahead) {
                        count_vm_event(SWAP_RA_HIT);
-                       if (!vma)
+                       if (!vma || !vma_ra)
                                atomic_inc(&swapin_readahead_hits);
                }
        }
+
        return page;
 }
 
@@ -586,8 +597,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
                        continue;
                if (page_allocated) {
                        swap_readpage(page, false);
-                       if (offset != entry_offset &&
-                           likely(!PageTransCompound(page))) {
+                       if (offset != entry_offset) {
                                SetPageReadahead(page);
                                count_vm_event(SWAP_RA);
                        }
@@ -649,16 +659,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
                    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
 }
 
-struct page *swap_readahead_detect(struct vm_fault *vmf,
-                                  struct vma_swap_readahead *swap_ra)
+static void swap_ra_info(struct vm_fault *vmf,
+                       struct vma_swap_readahead *ra_info)
 {
        struct vm_area_struct *vma = vmf->vma;
-       unsigned long swap_ra_info;
-       struct page *page;
+       unsigned long ra_val;
        swp_entry_t entry;
        unsigned long faddr, pfn, fpfn;
        unsigned long start, end;
-       pte_t *pte;
+       pte_t *pte, *orig_pte;
        unsigned int max_win, hits, prev_win, win, left;
 #ifndef CONFIG_64BIT
        pte_t *tpte;
@@ -667,30 +676,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
        max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
                             SWAP_RA_ORDER_CEILING);
        if (max_win == 1) {
-               swap_ra->win = 1;
-               return NULL;
+               ra_info->win = 1;
+               return;
        }
 
        faddr = vmf->address;
-       entry = pte_to_swp_entry(vmf->orig_pte);
-       if ((unlikely(non_swap_entry(entry))))
-               return NULL;
-       page = lookup_swap_cache(entry, vma, faddr);
-       if (page)
-               return page;
+       orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
+       entry = pte_to_swp_entry(*pte);
+       if ((unlikely(non_swap_entry(entry)))) {
+               pte_unmap(orig_pte);
+               return;
+       }
 
        fpfn = PFN_DOWN(faddr);
-       swap_ra_info = GET_SWAP_RA_VAL(vma);
-       pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
-       prev_win = SWAP_RA_WIN(swap_ra_info);
-       hits = SWAP_RA_HITS(swap_ra_info);
-       swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
+       ra_val = GET_SWAP_RA_VAL(vma);
+       pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
+       prev_win = SWAP_RA_WIN(ra_val);
+       hits = SWAP_RA_HITS(ra_val);
+       ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
                                               max_win, prev_win);
        atomic_long_set(&vma->swap_readahead_info,
                        SWAP_RA_VAL(faddr, win, 0));
 
-       if (win == 1)
-               return NULL;
+       if (win == 1) {
+               pte_unmap(orig_pte);
+               return;
+       }
 
        /* Copy the PTEs because the page table may be unmapped */
        if (fpfn == pfn + 1)
@@ -703,23 +714,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
                swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
                                  &start, &end);
        }
-       swap_ra->nr_pte = end - start;
-       swap_ra->offset = fpfn - start;
-       pte = vmf->pte - swap_ra->offset;
+       ra_info->nr_pte = end - start;
+       ra_info->offset = fpfn - start;
+       pte -= ra_info->offset;
 #ifdef CONFIG_64BIT
-       swap_ra->ptes = pte;
+       ra_info->ptes = pte;
 #else
-       tpte = swap_ra->ptes;
+       tpte = ra_info->ptes;
        for (pfn = start; pfn != end; pfn++)
                *tpte++ = *pte++;
 #endif
-
-       return NULL;
+       pte_unmap(orig_pte);
 }
 
 struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
-                                   struct vm_fault *vmf,
-                                   struct vma_swap_readahead *swap_ra)
+                                   struct vm_fault *vmf)
 {
        struct blk_plug plug;
        struct vm_area_struct *vma = vmf->vma;
@@ -728,12 +737,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
        swp_entry_t entry;
        unsigned int i;
        bool page_allocated;
+       struct vma_swap_readahead ra_info = {0,};
 
-       if (swap_ra->win == 1)
+       swap_ra_info(vmf, &ra_info);
+       if (ra_info.win == 1)
                goto skip;
 
        blk_start_plug(&plug);
-       for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
+       for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
             i++, pte++) {
                pentry = *pte;
                if (pte_none(pentry))
@@ -749,8 +760,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
                        continue;
                if (page_allocated) {
                        swap_readpage(page, false);
-                       if (i != swap_ra->offset &&
-                           likely(!PageTransCompound(page))) {
+                       if (i != ra_info.offset) {
                                SetPageReadahead(page);
                                count_vm_event(SWAP_RA);
                        }
@@ -761,7 +771,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
        lru_add_drain();
 skip:
        return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
-                                    swap_ra->win == 1);
+                                    ra_info.win == 1);
 }
 
 #ifdef CONFIG_SYSFS