ANDROID: MGLRU: Avoid reactivation of anon pages on swap full
[platform/kernel/linux-rpi.git] / mm / vmscan.c
index 8a4fdda..a7ccb96 100644 (file)
@@ -2606,7 +2606,7 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
 
        /* FIXME: see a2a36488a61c + 26aa2d199d6f */
        if (/* !can_demote(pgdat->node_id, sc) && */
-           mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
+           mem_cgroup_get_nr_swap_pages(memcg) <= 0)
                return 0;
 
        return mem_cgroup_swappiness(memcg);
@@ -2696,18 +2696,13 @@ void lru_gen_del_mm(struct mm_struct *mm)
                if (!lruvec)
                        continue;
 
-               /* where the last iteration ended (exclusive) */
+               /* where the current iteration continues after */
+               if (lruvec->mm_state.head == &mm->lru_gen.list)
+                       lruvec->mm_state.head = lruvec->mm_state.head->prev;
+
+               /* where the last iteration ended before */
                if (lruvec->mm_state.tail == &mm->lru_gen.list)
                        lruvec->mm_state.tail = lruvec->mm_state.tail->next;
-
-               /* where the current iteration continues (inclusive) */
-               if (lruvec->mm_state.head != &mm->lru_gen.list)
-                       continue;
-
-               lruvec->mm_state.head = lruvec->mm_state.head->next;
-               /* the deletion ends the current iteration */
-               if (lruvec->mm_state.head == &mm_list->fifo)
-                       WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1);
        }
 
        list_del_init(&mm->lru_gen.list);
@@ -2888,68 +2883,54 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
                            struct mm_struct **iter)
 {
        bool first = false;
-       bool last = true;
+       bool last = false;
        struct mm_struct *mm = NULL;
        struct mem_cgroup *memcg = lruvec_memcg(lruvec);
        struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
        struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
 
        /*
-        * There are four interesting cases for this page table walker:
-        * 1. It tries to start a new iteration of mm_list with a stale max_seq;
-        *    there is nothing left to do.
-        * 2. It's the first of the current generation, and it needs to reset
-        *    the Bloom filter for the next generation.
-        * 3. It reaches the end of mm_list, and it needs to increment
-        *    mm_state->seq; the iteration is done.
-        * 4. It's the last of the current generation, and it needs to reset the
-        *    mm stats counters for the next generation.
+        * mm_state->seq is incremented after each iteration of mm_list. There
+        * are three interesting cases for this page table walker:
+        * 1. It tries to start a new iteration with a stale max_seq: there is
+        *    nothing left to do.
+        * 2. It started the next iteration: it needs to reset the Bloom filter
+        *    so that a fresh set of PTE tables can be recorded.
+        * 3. It ended the current iteration: it needs to reset the mm stats
+        *    counters and tell its caller to increment max_seq.
         */
        spin_lock(&mm_list->lock);
 
        VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq);
-       VM_WARN_ON_ONCE(*iter && mm_state->seq > walk->max_seq);
-       VM_WARN_ON_ONCE(*iter && !mm_state->nr_walkers);
 
-       if (walk->max_seq <= mm_state->seq) {
-               if (!*iter)
-                       last = false;
+       if (walk->max_seq <= mm_state->seq)
                goto done;
-       }
 
-       if (!mm_state->nr_walkers) {
-               VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo);
+       if (!mm_state->head)
+               mm_state->head = &mm_list->fifo;
 
-               mm_state->head = mm_list->fifo.next;
+       if (mm_state->head == &mm_list->fifo)
                first = true;
-       }
-
-       while (!mm && mm_state->head != &mm_list->fifo) {
-               mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list);
 
+       do {
                mm_state->head = mm_state->head->next;
+               if (mm_state->head == &mm_list->fifo) {
+                       WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
+                       last = true;
+                       break;
+               }
 
                /* force scan for those added after the last iteration */
-               if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) {
-                       mm_state->tail = mm_state->head;
+               if (!mm_state->tail || mm_state->tail == mm_state->head) {
+                       mm_state->tail = mm_state->head->next;
                        walk->force_scan = true;
                }
 
+               mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list);
                if (should_skip_mm(mm, walk))
                        mm = NULL;
-       }
-
-       if (mm_state->head == &mm_list->fifo)
-               WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
+       } while (!mm);
 done:
-       if (*iter && !mm)
-               mm_state->nr_walkers--;
-       if (!*iter && mm)
-               mm_state->nr_walkers++;
-
-       if (mm_state->nr_walkers)
-               last = false;
-
        if (*iter || last)
                reset_mm_stats(lruvec, walk, last);
 
@@ -2977,9 +2958,9 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
 
        VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq);
 
-       if (max_seq > mm_state->seq && !mm_state->nr_walkers) {
-               VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo);
-
+       if (max_seq > mm_state->seq) {
+               mm_state->head = NULL;
+               mm_state->tail = NULL;
                WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
                reset_mm_stats(lruvec, NULL, true);
                success = true;
@@ -3585,10 +3566,6 @@ restart:
 
                walk_pmd_range(&val, addr, next, args);
 
-               /* a racy check to curtail the waiting time */
-               if (wq_has_sleeper(&walk->lruvec->mm_state.wait))
-                       return 1;
-
                if (need_resched() || walk->batched >= MAX_LRU_BATCH) {
                        end = (addr | ~PUD_MASK) + 1;
                        goto done;
@@ -3621,8 +3598,14 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
        walk->next_addr = FIRST_USER_ADDRESS;
 
        do {
+               DEFINE_MAX_SEQ(lruvec);
+
                err = -EBUSY;
 
+               /* another thread might have called inc_max_seq() */
+               if (walk->max_seq != max_seq)
+                       break;
+
                /* page_update_gen() requires stable page_memcg() */
                if (!mem_cgroup_trylock_pages(memcg))
                        break;
@@ -3855,26 +3838,12 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
                success = iterate_mm_list(lruvec, walk, &mm);
                if (mm)
                        walk_mm(lruvec, mm, walk);
-
-               cond_resched();
        } while (mm);
 done:
-       if (!success) {
-               if (sc->priority <= DEF_PRIORITY - 2)
-                       wait_event_killable(lruvec->mm_state.wait,
-                                           max_seq < READ_ONCE(lrugen->max_seq));
-
-               return max_seq < READ_ONCE(lrugen->max_seq);
-       }
+       if (success)
+               inc_max_seq(lruvec, can_swap, force_scan);
 
-       VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
-
-       inc_max_seq(lruvec, can_swap, force_scan);
-       /* either this sees any waiters or they will see updated max_seq */
-       if (wq_has_sleeper(&lruvec->mm_state.wait))
-               wake_up_all(&lruvec->mm_state.wait);
-
-       return true;
+       return success;
 }
 
 static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
@@ -4789,6 +4758,7 @@ static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, c
        return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
 }
 
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
 static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
                             const char *buf, size_t len)
 {
@@ -4822,6 +4792,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
        return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
 }
 
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
 static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
                             const char *buf, size_t len)
 {
@@ -4969,6 +4940,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
        seq_putc(m, '\n');
 }
 
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
 static int lru_gen_seq_show(struct seq_file *m, void *v)
 {
        unsigned long seq;
@@ -5127,6 +5099,7 @@ done:
        return err;
 }
 
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
 static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
                                 size_t len, loff_t *pos)
 {
@@ -5239,7 +5212,6 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
                INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
 
        lruvec->mm_state.seq = MIN_NR_GENS;
-       init_waitqueue_head(&lruvec->mm_state.wait);
 }
 
 #ifdef CONFIG_MEMCG