WIP: update tizen_qemu_defconfig

[platform/kernel/linux-starfive.git] / mm / rmap.c
diff --git a/mm/rmap.c b/mm/rmap.c

index 93d5a6f..7da2d8d 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -23,10 +23,9 @@
   * inode->i_rwsem      (while writing or truncating, not reading or faulting)
   *   mm->mmap_lock
   *     mapping->invalidate_lock (in filemap_fault)
- *       page->flags PG_locked (lock_page)   * (see hugetlbfs below)
- *         hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
+ *       page->flags PG_locked (lock_page)
+ *         hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs below)
   *           mapping->i_mmap_rwsem
- *             hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
   *             anon_vma->rwsem
   *               mm->page_table_lock or pte_lock
   *                 swap_lock (in swap_duplicate, swap_info_get)
@@ -46,10 +45,11 @@
   *   ->tasklist_lock
   *     pte map lock
   *
- * * hugetlbfs PageHuge() pages take locks in this order:
- *         mapping->i_mmap_rwsem
- *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
- *             page->flags PG_locked (lock_page)
+ * hugetlbfs PageHuge() take locks in this order:
+ *   hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
+ *     vma_lock (hugetlb specific lock for pmd_sharing)
+ *       mapping->i_mmap_rwsem (also used for hugetlb pmd sharing)
+ *         page->flags PG_locked (lock_page)
   */
  
  #include <linux/mm.h>
@@ -489,16 +489,16 @@ void __init anon_vma_init(void)
   * if there is a mapcount, we can dereference the anon_vma after observing
   * those.
   */
-struct anon_vma *page_get_anon_vma(struct page *page)
+struct anon_vma *folio_get_anon_vma(struct folio *folio)
  {
         struct anon_vma *anon_vma = NULL;
         unsigned long anon_mapping;
  
         rcu_read_lock();
-       anon_mapping = (unsigned long)READ_ONCE(page->mapping);
+       anon_mapping = (unsigned long)READ_ONCE(folio->mapping);
         if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
                 goto out;
-       if (!page_mapped(page))
+       if (!folio_mapped(folio))
                 goto out;
  
         anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
@@ -508,13 +508,13 @@ struct anon_vma *page_get_anon_vma(struct page *page)
         }
  
         /*
-        * If this page is still mapped, then its anon_vma cannot have been
+        * If this folio is still mapped, then its anon_vma cannot have been
          * freed.  But if it has been unmapped, we have no security against the
          * anon_vma structure being freed and reused (for another anon_vma:
          * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
          * above cannot corrupt).
          */
-       if (!page_mapped(page)) {
+       if (!folio_mapped(folio)) {
                 rcu_read_unlock();
                 put_anon_vma(anon_vma);
                 return NULL;
@@ -526,11 +526,11 @@ out:
  }
  
  /*
- * Similar to page_get_anon_vma() except it locks the anon_vma.
+ * Similar to folio_get_anon_vma() except it locks the anon_vma.
   *
   * Its a little more complex as it tries to keep the fast path to a single
   * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
- * reference like with page_get_anon_vma() and then block on the mutex
+ * reference like with folio_get_anon_vma() and then block on the mutex
   * on !rwc->try_lock case.
   */
  struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
@@ -602,11 +602,6 @@ out:
         return anon_vma;
  }
  
-void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
-{
-       anon_vma_unlock_read(anon_vma);
-}
-
  #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
  /*
   * Flush TLB entries for recently unmapped pages from remote CPUs. It is
@@ -770,13 +765,17 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
         return vma_address(page, vma);
  }
  
+/*
+ * Returns the actual pmd_t* where we expect 'address' to be mapped from, or
+ * NULL if it doesn't exist.  No guarantees / checks on what the pmd_t*
+ * represents.
+ */
  pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
  {
         pgd_t *pgd;
         p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd = NULL;
-       pmd_t pmde;
  
         pgd = pgd_offset(mm, address);
         if (!pgd_present(*pgd))
@@ -791,15 +790,6 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
                 goto out;
  
         pmd = pmd_offset(pud, address);
-       /*
-        * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
-        * without holding anon_vma lock for write.  So when looking for a
-        * genuine pmde (in which to find pte), test present and !THP together.
-        */
-       pmde = *pmd;
-       barrier();
-       if (!pmd_present(pmde) || pmd_trans_huge(pmde))
-               pmd = NULL;
  out:
         return pmd;
  }
@@ -833,6 +823,12 @@ static bool folio_referenced_one(struct folio *folio,
                 }
  
                 if (pvmw.pte) {
+                       if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
+                           !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
+                               lru_gen_look_around(&pvmw);
+                               referenced++;
+                       }
+
                         if (ptep_clear_flush_young_notify(vma, address,
                                                 pvmw.pte)) {
                                 /*
@@ -1101,22 +1097,20 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
   */
  void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
  {
-       struct anon_vma *anon_vma = vma->anon_vma;
-       struct page *subpage = page;
-
-       page = compound_head(page);
+       void *anon_vma = vma->anon_vma;
+       struct folio *folio = page_folio(page);
  
-       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
         VM_BUG_ON_VMA(!anon_vma, vma);
  
-       anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
+       anon_vma += PAGE_MAPPING_ANON;
         /*
          * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
          * simultaneously, so a concurrent reader (eg folio_referenced()'s
          * folio_test_anon()) will not see one without the other.
          */
-       WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
-       SetPageAnonExclusive(subpage);
+       WRITE_ONCE(folio->mapping, anon_vma);
+       SetPageAnonExclusive(page);
  }
  
  /**
@@ -1560,33 +1554,45 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                          * To call huge_pmd_unshare, i_mmap_rwsem must be
                          * held in write mode.  Caller needs to explicitly
                          * do this outside rmap routines.
+                        *
+                        * We also must hold hugetlb vma_lock in write mode.
+                        * Lock order dictates acquiring vma_lock BEFORE
+                        * i_mmap_rwsem.  We can only try lock here and fail
+                        * if unsuccessful.
                          */
-                       VM_BUG_ON(!anon && !(flags & TTU_RMAP_LOCKED));
-                       if (!anon && huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
-                               flush_tlb_range(vma, range.start, range.end);
-                               mmu_notifier_invalidate_range(mm, range.start,
-                                                             range.end);
-
-                               /*
-                                * The ref count of the PMD page was dropped
-                                * which is part of the way map counting
-                                * is done for shared PMDs.  Return 'true'
-                                * here.  When there is no other sharing,
-                                * huge_pmd_unshare returns false and we will
-                                * unmap the actual page and drop map count
-                                * to zero.
-                                */
-                               page_vma_mapped_walk_done(&pvmw);
-                               break;
+                       if (!anon) {
+                               VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
+                               if (!hugetlb_vma_trylock_write(vma)) {
+                                       page_vma_mapped_walk_done(&pvmw);
+                                       ret = false;
+                                       break;
+                               }
+                               if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
+                                       hugetlb_vma_unlock_write(vma);
+                                       flush_tlb_range(vma,
+                                               range.start, range.end);
+                                       mmu_notifier_invalidate_range(mm,
+                                               range.start, range.end);
+                                       /*
+                                        * The ref count of the PMD page was
+                                        * dropped which is part of the way map
+                                        * counting is done for shared PMDs.
+                                        * Return 'true' here.  When there is
+                                        * no other sharing, huge_pmd_unshare
+                                        * returns false and we will unmap the
+                                        * actual page and drop map count
+                                        * to zero.
+                                        */
+                                       page_vma_mapped_walk_done(&pvmw);
+                                       break;
+                               }
+                               hugetlb_vma_unlock_write(vma);
                         }
                         pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
                 } else {
                         flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
-                       /*
-                        * Nuke the page table entry. When having to clear
-                        * PageAnonExclusive(), we always have to flush.
-                        */
-                       if (should_defer_flush(mm, flags) && !anon_exclusive) {
+                       /* Nuke the page table entry. */
+                       if (should_defer_flush(mm, flags)) {
                                 /*
                                  * We clear the PTE but do not flush so potentially
                                  * a remote CPU could still be writing to the folio.
@@ -1617,7 +1623,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                 /* Update high watermark before we lower rss */
                 update_hiwater_rss(mm);
  
-               if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
+               if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) {
                         pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
                         if (folio_test_hugetlb(folio)) {
                                 hugetlb_count_sub(folio_nr_pages(folio), mm);
@@ -1717,6 +1723,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                                 page_vma_mapped_walk_done(&pvmw);
                                 break;
                         }
+
+                       /* See page_try_share_anon_rmap(): clear PTE first. */
                         if (anon_exclusive &&
                             page_try_share_anon_rmap(subpage)) {
                                 swap_free(entry);
@@ -1936,26 +1944,41 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                          * To call huge_pmd_unshare, i_mmap_rwsem must be
                          * held in write mode.  Caller needs to explicitly
                          * do this outside rmap routines.
+                        *
+                        * We also must hold hugetlb vma_lock in write mode.
+                        * Lock order dictates acquiring vma_lock BEFORE
+                        * i_mmap_rwsem.  We can only try lock here and
+                        * fail if unsuccessful.
                          */
-                       VM_BUG_ON(!anon && !(flags & TTU_RMAP_LOCKED));
-                       if (!anon && huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
-                               flush_tlb_range(vma, range.start, range.end);
-                               mmu_notifier_invalidate_range(mm, range.start,
-                                                             range.end);
-
-                               /*
-                                * The ref count of the PMD page was dropped
-                                * which is part of the way map counting
-                                * is done for shared PMDs.  Return 'true'
-                                * here.  When there is no other sharing,
-                                * huge_pmd_unshare returns false and we will
-                                * unmap the actual page and drop map count
-                                * to zero.
-                                */
-                               page_vma_mapped_walk_done(&pvmw);
-                               break;
+                       if (!anon) {
+                               VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
+                               if (!hugetlb_vma_trylock_write(vma)) {
+                                       page_vma_mapped_walk_done(&pvmw);
+                                       ret = false;
+                                       break;
+                               }
+                               if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
+                                       hugetlb_vma_unlock_write(vma);
+                                       flush_tlb_range(vma,
+                                               range.start, range.end);
+                                       mmu_notifier_invalidate_range(mm,
+                                               range.start, range.end);
+
+                                       /*
+                                        * The ref count of the PMD page was
+                                        * dropped which is part of the way map
+                                        * counting is done for shared PMDs.
+                                        * Return 'true' here.  When there is
+                                        * no other sharing, huge_pmd_unshare
+                                        * returns false and we will unmap the
+                                        * actual page and drop map count
+                                        * to zero.
+                                        */
+                                       page_vma_mapped_walk_done(&pvmw);
+                                       break;
+                               }
+                               hugetlb_vma_unlock_write(vma);
                         }
-
                         /* Nuke the hugetlb page table entry */
                         pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
                 } else {
@@ -2048,6 +2071,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                         }
                         VM_BUG_ON_PAGE(pte_write(pteval) && folio_test_anon(folio) &&
                                        !anon_exclusive, subpage);
+
+                       /* See page_try_share_anon_rmap(): clear PTE first. */
                         if (anon_exclusive &&
                             page_try_share_anon_rmap(subpage)) {
                                 if (folio_test_hugetlb(folio))
@@ -2073,7 +2098,10 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                         else
                                 entry = make_readable_migration_entry(
                                                         page_to_pfn(subpage));
-
+                       if (pte_young(pteval))
+                               entry = make_migration_entry_young(entry);
+                       if (pte_dirty(pteval))
+                               entry = make_migration_entry_dirty(entry);
                         swp_pte = swp_entry_to_pte(entry);
                         if (pte_soft_dirty(pteval))
                                 swp_pte = pte_swp_mksoft_dirty(swp_pte);