WIP: update tizen_qemu_defconfig
[platform/kernel/linux-starfive.git] / mm / vmscan.c
index 04d8b88..d182961 100644 (file)
@@ -740,6 +740,8 @@ EXPORT_SYMBOL(register_shrinker);
  */
 void unregister_shrinker(struct shrinker *shrinker)
 {
+       struct dentry *debugfs_entry;
+
        if (!(shrinker->flags & SHRINKER_REGISTERED))
                return;
 
@@ -748,9 +750,11 @@ void unregister_shrinker(struct shrinker *shrinker)
        shrinker->flags &= ~SHRINKER_REGISTERED;
        if (shrinker->flags & SHRINKER_MEMCG_AWARE)
                unregister_memcg_shrinker(shrinker);
-       shrinker_debugfs_remove(shrinker);
+       debugfs_entry = shrinker_debugfs_remove(shrinker);
        up_write(&shrinker_rwsem);
 
+       debugfs_remove_recursive(debugfs_entry);
+
        kfree(shrinker->nr_deferred);
        shrinker->nr_deferred = NULL;
 }
@@ -1883,6 +1887,16 @@ retry:
                        }
                }
 
+               /*
+                * Folio is unmapped now so it cannot be newly pinned anymore.
+                * No point in trying to reclaim folio if it is pinned.
+                * Furthermore we don't want to reclaim underlying fs metadata
+                * if the folio is pinned and thus potentially modified by the
+                * pinning process as that may upset the filesystem.
+                */
+               if (folio_maybe_dma_pinned(folio))
+                       goto activate_locked;
+
                mapping = folio_mapping(folio);
                if (folio_test_dirty(folio)) {
                        /*
@@ -2514,8 +2528,20 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
         * the flushers simply cannot keep up with the allocation
         * rate. Nudge the flusher threads in case they are asleep.
         */
-       if (stat.nr_unqueued_dirty == nr_taken)
+       if (stat.nr_unqueued_dirty == nr_taken) {
                wakeup_flusher_threads(WB_REASON_VMSCAN);
+               /*
+                * For cgroupv1 dirty throttling is achieved by waking up
+                * the kernel flusher here and later waiting on folios
+                * which are in writeback to finish (see shrink_folio_list()).
+                *
+                * Flusher may not be able to issue writeback quickly
+                * enough for cgroupv1 writeback throttling to work
+                * on a large system.
+                */
+               if (!writeback_throttling_sane(sc))
+                       reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
+       }
 
        sc->nr.dirty += stat.nr_dirty;
        sc->nr.congested += stat.nr_congested;
@@ -3278,13 +3304,16 @@ void lru_gen_migrate_mm(struct mm_struct *mm)
        if (mem_cgroup_disabled())
                return;
 
+       /* migration can happen before addition */
+       if (!mm->lru_gen.memcg)
+               return;
+
        rcu_read_lock();
        memcg = mem_cgroup_from_task(task);
        rcu_read_unlock();
        if (memcg == mm->lru_gen.memcg)
                return;
 
-       VM_WARN_ON_ONCE(!mm->lru_gen.memcg);
        VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list));
 
        lru_gen_del_mm(mm);
@@ -3975,7 +4004,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
                        goto next;
 
                if (!pmd_trans_huge(pmd[i])) {
-                       if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
+                       if (arch_has_hw_nonleaf_pmd_young() &&
                            get_cap(LRU_GEN_NONLEAF_YOUNG))
                                pmdp_test_and_clear_young(vma, addr, pmd + i);
                        goto next;
@@ -4073,14 +4102,14 @@ restart:
 #endif
                walk->mm_stats[MM_NONLEAF_TOTAL]++;
 
-#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
-               if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
+               if (arch_has_hw_nonleaf_pmd_young() &&
+                   get_cap(LRU_GEN_NONLEAF_YOUNG)) {
                        if (!pmd_young(val))
                                continue;
 
                        walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
                }
-#endif
+
                if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
                        continue;
 
@@ -4971,10 +5000,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
        int scanned;
        int reclaimed;
        LIST_HEAD(list);
+       LIST_HEAD(clean);
        struct folio *folio;
+       struct folio *next;
        enum vm_event_item item;
        struct reclaim_stat stat;
        struct lru_gen_mm_walk *walk;
+       bool skip_retry = false;
        struct mem_cgroup *memcg = lruvec_memcg(lruvec);
        struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 
@@ -4991,20 +5023,37 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
 
        if (list_empty(&list))
                return scanned;
-
+retry:
        reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
+       sc->nr_reclaimed += reclaimed;
 
-       list_for_each_entry(folio, &list, lru) {
-               /* restore LRU_REFS_FLAGS cleared by isolate_folio() */
-               if (folio_test_workingset(folio))
-                       folio_set_referenced(folio);
+       list_for_each_entry_safe_reverse(folio, next, &list, lru) {
+               if (!folio_evictable(folio)) {
+                       list_del(&folio->lru);
+                       folio_putback_lru(folio);
+                       continue;
+               }
 
-               /* don't add rejected pages to the oldest generation */
                if (folio_test_reclaim(folio) &&
-                   (folio_test_dirty(folio) || folio_test_writeback(folio)))
-                       folio_clear_active(folio);
-               else
-                       folio_set_active(folio);
+                   (folio_test_dirty(folio) || folio_test_writeback(folio))) {
+                       /* restore LRU_REFS_FLAGS cleared by isolate_folio() */
+                       if (folio_test_workingset(folio))
+                               folio_set_referenced(folio);
+                       continue;
+               }
+
+               if (skip_retry || folio_test_active(folio) || folio_test_referenced(folio) ||
+                   folio_mapped(folio) || folio_test_locked(folio) ||
+                   folio_test_dirty(folio) || folio_test_writeback(folio)) {
+                       /* don't add rejected folios to the oldest generation */
+                       set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS,
+                                     BIT(PG_active));
+                       continue;
+               }
+
+               /* retry folios that may have missed folio_rotate_reclaimable() */
+               list_move(&folio->lru, &clean);
+               sc->nr_scanned -= folio_nr_pages(folio);
        }
 
        spin_lock_irq(&lruvec->lru_lock);
@@ -5026,7 +5075,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
        mem_cgroup_uncharge_list(&list);
        free_unref_page_list(&list);
 
-       sc->nr_reclaimed += reclaimed;
+       INIT_LIST_HEAD(&list);
+       list_splice_init(&clean, &list);
+
+       if (!list_empty(&list)) {
+               skip_retry = true;
+               goto retry;
+       }
 
        if (need_swapping && type == LRU_GEN_ANON)
                *need_swapping = true;
@@ -5354,7 +5409,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
        if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
                caps |= BIT(LRU_GEN_MM_WALK);
 
-       if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
+       if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
                caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
 
        return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
@@ -5844,8 +5899,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
        enum lru_list lru;
        unsigned long nr_reclaimed = 0;
        unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+       bool proportional_reclaim;
        struct blk_plug plug;
-       bool scan_adjusted;
 
        if (lru_gen_enabled()) {
                lru_gen_shrink_lruvec(lruvec, sc);
@@ -5868,8 +5923,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
         * abort proportional reclaim if either the file or anon lru has already
         * dropped to zero at the first pass.
         */
-       scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
-                        sc->priority == DEF_PRIORITY);
+       proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+                               sc->priority == DEF_PRIORITY);
 
        blk_start_plug(&plug);
        while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -5889,7 +5944,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 
                cond_resched();
 
-               if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
+               if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
                        continue;
 
                /*
@@ -5940,8 +5995,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
                nr_scanned = targets[lru] - nr[lru];
                nr[lru] = targets[lru] * (100 - percentage) / 100;
                nr[lru] -= min(nr[lru], nr_scanned);
-
-               scan_adjusted = true;
        }
        blk_finish_plug(&plug);
        sc->nr_reclaimed += nr_reclaimed;