BACKPORT: mm: multi-gen LRU: kill switch
[platform/kernel/linux-rpi.git] / mm / vmscan.c
index 66e7770..020f367 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/psi.h>
 #include <linux/pagewalk.h>
 #include <linux/shmem_fs.h>
+#include <linux/ctype.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -2549,6 +2550,14 @@ out:
 
 #ifdef CONFIG_LRU_GEN
 
+#ifdef CONFIG_LRU_GEN_ENABLED
+DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS);
+#define get_cap(cap)   static_branch_likely(&lru_gen_caps[cap])
+#else
+DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
+#define get_cap(cap)   static_branch_unlikely(&lru_gen_caps[cap])
+#endif
+
 /******************************************************************************
  *                          shorthand helpers
  ******************************************************************************/
@@ -3426,7 +3435,8 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
                        goto next;
 
                if (!pmd_trans_huge(pmd[i])) {
-                       if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG))
+                       if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
+                           get_cap(LRU_GEN_NONLEAF_YOUNG))
                                pmdp_test_and_clear_young(vma, addr, pmd + i);
                        goto next;
                }
@@ -3524,10 +3534,12 @@ restart:
                walk->mm_stats[MM_NONLEAF_TOTAL]++;
 
 #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
-               if (!pmd_young(val))
-                       continue;
+               if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
+                       if (!pmd_young(val))
+                               continue;
 
-               walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
+                       walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
+               }
 #endif
                if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
                        continue;
@@ -3789,7 +3801,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
         * handful of PTEs. Spreading the work out over a period of time usually
         * is less efficient, but it avoids bursty page faults.
         */
-       if (!arch_has_hw_pte_young()) {
+       if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
                success = iterate_mm_list_nowalk(lruvec, max_seq);
                goto done;
        }
@@ -4552,6 +4564,210 @@ done:
 }
 
 /******************************************************************************
+ *                          state change
+ ******************************************************************************/
+
+static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
+{
+       struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+       if (lrugen->enabled) {
+               enum lru_list lru;
+
+               for_each_evictable_lru(lru) {
+                       if (!list_empty(&lruvec->lists[lru]))
+                               return false;
+               }
+       } else {
+               int gen, type, zone;
+
+               for_each_gen_type_zone(gen, type, zone) {
+                       if (!list_empty(&lrugen->lists[gen][type][zone]))
+                               return false;
+               }
+       }
+
+       return true;
+}
+
+static bool fill_evictable(struct lruvec *lruvec)
+{
+       enum lru_list lru;
+       int remaining = MAX_LRU_BATCH;
+
+       for_each_evictable_lru(lru) {
+               int type = is_file_lru(lru);
+               bool active = is_active_lru(lru);
+               struct list_head *head = &lruvec->lists[lru];
+
+               while (!list_empty(head)) {
+                       bool success;
+                       struct page *page = lru_to_page(head);
+                       enum lru_list lru = page_lru_base_type(page);
+
+                       VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page);
+                       VM_WARN_ON_ONCE_PAGE(PageActive(page) != active, page);
+                       VM_WARN_ON_ONCE_PAGE(page_is_file_cache(page) != type, page);
+                       VM_WARN_ON_ONCE_PAGE(page_lru_gen(page) != -1, page);
+
+                       del_page_from_lru_list(page, lruvec, lru);
+                       success = lru_gen_add_page(lruvec, page, false);
+                       VM_WARN_ON_ONCE(!success);
+
+                       if (!--remaining)
+                               return false;
+               }
+       }
+
+       return true;
+}
+
+static bool drain_evictable(struct lruvec *lruvec)
+{
+       int gen, type, zone;
+       int remaining = MAX_LRU_BATCH;
+
+       for_each_gen_type_zone(gen, type, zone) {
+               struct list_head *head = &lruvec->lrugen.lists[gen][type][zone];
+
+               while (!list_empty(head)) {
+                       bool success;
+                       struct page *page = lru_to_page(head);
+                       enum lru_list lru = page_lru_base_type(page);
+
+                       VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page);
+                       VM_WARN_ON_ONCE_PAGE(PageActive(page), page);
+                       VM_WARN_ON_ONCE_PAGE(page_is_file_cache(page) != type, page);
+                       VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page);
+
+                       success = lru_gen_del_page(lruvec, page, false);
+                       VM_WARN_ON_ONCE(!success);
+                       add_page_to_lru_list(page, lruvec, lru);
+
+                       if (!--remaining)
+                               return false;
+               }
+       }
+
+       return true;
+}
+
+static void lru_gen_change_state(bool enabled)
+{
+       static DEFINE_MUTEX(state_mutex);
+
+       struct mem_cgroup *memcg;
+
+       cgroup_lock();
+       cpus_read_lock();
+       get_online_mems();
+       mutex_lock(&state_mutex);
+
+       if (enabled == lru_gen_enabled())
+               goto unlock;
+
+       if (enabled)
+               static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
+       else
+               static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
+
+       memcg = mem_cgroup_iter(NULL, NULL, NULL);
+       do {
+               int nid;
+
+               for_each_node(nid) {
+                       struct lruvec *lruvec = get_lruvec(memcg, nid);
+
+                       if (!lruvec)
+                               continue;
+
+                       spin_lock_irq(&lruvec_pgdat(lruvec)->lru_lock);
+
+                       VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
+                       VM_WARN_ON_ONCE(!state_is_valid(lruvec));
+
+                       lruvec->lrugen.enabled = enabled;
+
+                       while (!(enabled ? fill_evictable(lruvec) : drain_evictable(lruvec))) {
+                               spin_unlock_irq(&lruvec_pgdat(lruvec)->lru_lock);
+                               cond_resched();
+                               spin_lock_irq(&lruvec_pgdat(lruvec)->lru_lock);
+                       }
+
+                       spin_unlock_irq(&lruvec_pgdat(lruvec)->lru_lock);
+               }
+
+               cond_resched();
+       } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+unlock:
+       mutex_unlock(&state_mutex);
+       put_online_mems();
+       cpus_read_unlock();
+       cgroup_unlock();
+}
+
+/******************************************************************************
+ *                          sysfs interface
+ ******************************************************************************/
+
+static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+       unsigned int caps = 0;
+
+       if (get_cap(LRU_GEN_CORE))
+               caps |= BIT(LRU_GEN_CORE);
+
+       if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
+               caps |= BIT(LRU_GEN_MM_WALK);
+
+       if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
+               caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
+
+       return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
+}
+
+static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
+                            const char *buf, size_t len)
+{
+       int i;
+       unsigned int caps;
+
+       if (tolower(*buf) == 'n')
+               caps = 0;
+       else if (tolower(*buf) == 'y')
+               caps = -1;
+       else if (kstrtouint(buf, 0, &caps))
+               return -EINVAL;
+
+       for (i = 0; i < NR_LRU_GEN_CAPS; i++) {
+               bool enabled = caps & BIT(i);
+
+               if (i == LRU_GEN_CORE)
+                       lru_gen_change_state(enabled);
+               else if (enabled)
+                       static_branch_enable(&lru_gen_caps[i]);
+               else
+                       static_branch_disable(&lru_gen_caps[i]);
+       }
+
+       return len;
+}
+
+static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
+       enabled, 0644, show_enabled, store_enabled
+);
+
+static struct attribute *lru_gen_attrs[] = {
+       &lru_gen_enabled_attr.attr,
+       NULL
+};
+
+static struct attribute_group lru_gen_attr_group = {
+       .name = "lru_gen",
+       .attrs = lru_gen_attrs,
+};
+
+/******************************************************************************
  *                          initialization
  ******************************************************************************/
 
@@ -4561,6 +4777,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
        struct lru_gen_struct *lrugen = &lruvec->lrugen;
 
        lrugen->max_seq = MIN_NR_GENS + 1;
+       lrugen->enabled = lru_gen_enabled();
 
        for_each_gen_type_zone(gen, type, zone)
                INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
@@ -4600,6 +4817,9 @@ static int __init init_lru_gen(void)
        BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
        BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
 
+       if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
+               pr_err("lru_gen: failed to create sysfs group\n");
+
        return 0;
 };
 late_initcall(init_lru_gen);