Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[platform/kernel/linux-rpi.git] / mm / list_lru.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4  * Authors: David Chinner and Glauber Costa
5  *
6  * Generic LRU infrastructure
7  */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/mm.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
15 #include "slab.h"
16
17 #ifdef CONFIG_MEMCG_KMEM
18 static LIST_HEAD(list_lrus);
19 static DEFINE_MUTEX(list_lrus_mutex);
20
21 static void list_lru_register(struct list_lru *lru)
22 {
23         mutex_lock(&list_lrus_mutex);
24         list_add(&lru->list, &list_lrus);
25         mutex_unlock(&list_lrus_mutex);
26 }
27
28 static void list_lru_unregister(struct list_lru *lru)
29 {
30         mutex_lock(&list_lrus_mutex);
31         list_del(&lru->list);
32         mutex_unlock(&list_lrus_mutex);
33 }
34
35 static int lru_shrinker_id(struct list_lru *lru)
36 {
37         return lru->shrinker_id;
38 }
39
40 static inline bool list_lru_memcg_aware(struct list_lru *lru)
41 {
42         return lru->memcg_aware;
43 }
44
45 static inline struct list_lru_one *
46 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
47 {
48         struct list_lru_memcg *memcg_lrus;
49         /*
50          * Either lock or RCU protects the array of per cgroup lists
51          * from relocation (see memcg_update_list_lru_node).
52          */
53         memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
54                                            lockdep_is_held(&nlru->lock));
55         if (memcg_lrus && idx >= 0)
56                 return memcg_lrus->lru[idx];
57         return &nlru->lru;
58 }
59
60 static inline struct list_lru_one *
61 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
62                    struct mem_cgroup **memcg_ptr)
63 {
64         struct list_lru_one *l = &nlru->lru;
65         struct mem_cgroup *memcg = NULL;
66
67         if (!nlru->memcg_lrus)
68                 goto out;
69
70         memcg = mem_cgroup_from_obj(ptr);
71         if (!memcg)
72                 goto out;
73
74         l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
75 out:
76         if (memcg_ptr)
77                 *memcg_ptr = memcg;
78         return l;
79 }
80 #else
81 static void list_lru_register(struct list_lru *lru)
82 {
83 }
84
85 static void list_lru_unregister(struct list_lru *lru)
86 {
87 }
88
89 static int lru_shrinker_id(struct list_lru *lru)
90 {
91         return -1;
92 }
93
94 static inline bool list_lru_memcg_aware(struct list_lru *lru)
95 {
96         return false;
97 }
98
99 static inline struct list_lru_one *
100 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
101 {
102         return &nlru->lru;
103 }
104
105 static inline struct list_lru_one *
106 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
107                    struct mem_cgroup **memcg_ptr)
108 {
109         if (memcg_ptr)
110                 *memcg_ptr = NULL;
111         return &nlru->lru;
112 }
113 #endif /* CONFIG_MEMCG_KMEM */
114
115 bool list_lru_add(struct list_lru *lru, struct list_head *item)
116 {
117         int nid = page_to_nid(virt_to_page(item));
118         struct list_lru_node *nlru = &lru->node[nid];
119         struct mem_cgroup *memcg;
120         struct list_lru_one *l;
121
122         spin_lock(&nlru->lock);
123         if (list_empty(item)) {
124                 l = list_lru_from_kmem(nlru, item, &memcg);
125                 list_add_tail(item, &l->list);
126                 /* Set shrinker bit if the first element was added */
127                 if (!l->nr_items++)
128                         memcg_set_shrinker_bit(memcg, nid,
129                                                lru_shrinker_id(lru));
130                 nlru->nr_items++;
131                 spin_unlock(&nlru->lock);
132                 return true;
133         }
134         spin_unlock(&nlru->lock);
135         return false;
136 }
137 EXPORT_SYMBOL_GPL(list_lru_add);
138
139 bool list_lru_del(struct list_lru *lru, struct list_head *item)
140 {
141         int nid = page_to_nid(virt_to_page(item));
142         struct list_lru_node *nlru = &lru->node[nid];
143         struct list_lru_one *l;
144
145         spin_lock(&nlru->lock);
146         if (!list_empty(item)) {
147                 l = list_lru_from_kmem(nlru, item, NULL);
148                 list_del_init(item);
149                 l->nr_items--;
150                 nlru->nr_items--;
151                 spin_unlock(&nlru->lock);
152                 return true;
153         }
154         spin_unlock(&nlru->lock);
155         return false;
156 }
157 EXPORT_SYMBOL_GPL(list_lru_del);
158
159 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
160 {
161         list_del_init(item);
162         list->nr_items--;
163 }
164 EXPORT_SYMBOL_GPL(list_lru_isolate);
165
166 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
167                            struct list_head *head)
168 {
169         list_move(item, head);
170         list->nr_items--;
171 }
172 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
173
174 unsigned long list_lru_count_one(struct list_lru *lru,
175                                  int nid, struct mem_cgroup *memcg)
176 {
177         struct list_lru_node *nlru = &lru->node[nid];
178         struct list_lru_one *l;
179         unsigned long count;
180
181         rcu_read_lock();
182         l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
183         count = READ_ONCE(l->nr_items);
184         rcu_read_unlock();
185
186         return count;
187 }
188 EXPORT_SYMBOL_GPL(list_lru_count_one);
189
190 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
191 {
192         struct list_lru_node *nlru;
193
194         nlru = &lru->node[nid];
195         return nlru->nr_items;
196 }
197 EXPORT_SYMBOL_GPL(list_lru_count_node);
198
199 static unsigned long
200 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
201                     list_lru_walk_cb isolate, void *cb_arg,
202                     unsigned long *nr_to_walk)
203 {
204
205         struct list_lru_one *l;
206         struct list_head *item, *n;
207         unsigned long isolated = 0;
208
209         l = list_lru_from_memcg_idx(nlru, memcg_idx);
210 restart:
211         list_for_each_safe(item, n, &l->list) {
212                 enum lru_status ret;
213
214                 /*
215                  * decrement nr_to_walk first so that we don't livelock if we
216                  * get stuck on large numbers of LRU_RETRY items
217                  */
218                 if (!*nr_to_walk)
219                         break;
220                 --*nr_to_walk;
221
222                 ret = isolate(item, l, &nlru->lock, cb_arg);
223                 switch (ret) {
224                 case LRU_REMOVED_RETRY:
225                         assert_spin_locked(&nlru->lock);
226                         fallthrough;
227                 case LRU_REMOVED:
228                         isolated++;
229                         nlru->nr_items--;
230                         /*
231                          * If the lru lock has been dropped, our list
232                          * traversal is now invalid and so we have to
233                          * restart from scratch.
234                          */
235                         if (ret == LRU_REMOVED_RETRY)
236                                 goto restart;
237                         break;
238                 case LRU_ROTATE:
239                         list_move_tail(item, &l->list);
240                         break;
241                 case LRU_SKIP:
242                         break;
243                 case LRU_RETRY:
244                         /*
245                          * The lru lock has been dropped, our list traversal is
246                          * now invalid and so we have to restart from scratch.
247                          */
248                         assert_spin_locked(&nlru->lock);
249                         goto restart;
250                 default:
251                         BUG();
252                 }
253         }
254         return isolated;
255 }
256
257 unsigned long
258 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
259                   list_lru_walk_cb isolate, void *cb_arg,
260                   unsigned long *nr_to_walk)
261 {
262         struct list_lru_node *nlru = &lru->node[nid];
263         unsigned long ret;
264
265         spin_lock(&nlru->lock);
266         ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
267                                   nr_to_walk);
268         spin_unlock(&nlru->lock);
269         return ret;
270 }
271 EXPORT_SYMBOL_GPL(list_lru_walk_one);
272
273 unsigned long
274 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
275                       list_lru_walk_cb isolate, void *cb_arg,
276                       unsigned long *nr_to_walk)
277 {
278         struct list_lru_node *nlru = &lru->node[nid];
279         unsigned long ret;
280
281         spin_lock_irq(&nlru->lock);
282         ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
283                                   nr_to_walk);
284         spin_unlock_irq(&nlru->lock);
285         return ret;
286 }
287
288 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
289                                  list_lru_walk_cb isolate, void *cb_arg,
290                                  unsigned long *nr_to_walk)
291 {
292         long isolated = 0;
293         int memcg_idx;
294
295         isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
296                                       nr_to_walk);
297         if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
298                 for_each_memcg_cache_index(memcg_idx) {
299                         struct list_lru_node *nlru = &lru->node[nid];
300
301                         spin_lock(&nlru->lock);
302                         isolated += __list_lru_walk_one(nlru, memcg_idx,
303                                                         isolate, cb_arg,
304                                                         nr_to_walk);
305                         spin_unlock(&nlru->lock);
306
307                         if (*nr_to_walk <= 0)
308                                 break;
309                 }
310         }
311         return isolated;
312 }
313 EXPORT_SYMBOL_GPL(list_lru_walk_node);
314
315 static void init_one_lru(struct list_lru_one *l)
316 {
317         INIT_LIST_HEAD(&l->list);
318         l->nr_items = 0;
319 }
320
321 #ifdef CONFIG_MEMCG_KMEM
322 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
323                                           int begin, int end)
324 {
325         int i;
326
327         for (i = begin; i < end; i++)
328                 kfree(memcg_lrus->lru[i]);
329 }
330
331 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
332                                       int begin, int end)
333 {
334         int i;
335
336         for (i = begin; i < end; i++) {
337                 struct list_lru_one *l;
338
339                 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
340                 if (!l)
341                         goto fail;
342
343                 init_one_lru(l);
344                 memcg_lrus->lru[i] = l;
345         }
346         return 0;
347 fail:
348         __memcg_destroy_list_lru_node(memcg_lrus, begin, i);
349         return -ENOMEM;
350 }
351
352 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
353 {
354         struct list_lru_memcg *memcg_lrus;
355         int size = memcg_nr_cache_ids;
356
357         memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
358                               size * sizeof(void *), GFP_KERNEL);
359         if (!memcg_lrus)
360                 return -ENOMEM;
361
362         if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
363                 kvfree(memcg_lrus);
364                 return -ENOMEM;
365         }
366         RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
367
368         return 0;
369 }
370
371 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
372 {
373         struct list_lru_memcg *memcg_lrus;
374         /*
375          * This is called when shrinker has already been unregistered,
376          * and nobody can use it. So, there is no need to use kvfree_rcu_local().
377          */
378         memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
379         __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
380         kvfree(memcg_lrus);
381 }
382
383 static void kvfree_rcu_local(struct rcu_head *head)
384 {
385         struct list_lru_memcg *mlru;
386
387         mlru = container_of(head, struct list_lru_memcg, rcu);
388         kvfree(mlru);
389 }
390
391 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
392                                       int old_size, int new_size)
393 {
394         struct list_lru_memcg *old, *new;
395
396         BUG_ON(old_size > new_size);
397
398         old = rcu_dereference_protected(nlru->memcg_lrus,
399                                         lockdep_is_held(&list_lrus_mutex));
400         new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
401         if (!new)
402                 return -ENOMEM;
403
404         if (__memcg_init_list_lru_node(new, old_size, new_size)) {
405                 kvfree(new);
406                 return -ENOMEM;
407         }
408
409         memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
410
411         /*
412          * The locking below allows readers that hold nlru->lock avoid taking
413          * rcu_read_lock (see list_lru_from_memcg_idx).
414          *
415          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
416          * we have to use IRQ-safe primitives here to avoid deadlock.
417          */
418         spin_lock_irq(&nlru->lock);
419         rcu_assign_pointer(nlru->memcg_lrus, new);
420         spin_unlock_irq(&nlru->lock);
421
422         call_rcu(&old->rcu, kvfree_rcu_local);
423         return 0;
424 }
425
426 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
427                                               int old_size, int new_size)
428 {
429         struct list_lru_memcg *memcg_lrus;
430
431         memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
432                                                lockdep_is_held(&list_lrus_mutex));
433         /* do not bother shrinking the array back to the old size, because we
434          * cannot handle allocation failures here */
435         __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
436 }
437
438 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
439 {
440         int i;
441
442         lru->memcg_aware = memcg_aware;
443
444         if (!memcg_aware)
445                 return 0;
446
447         for_each_node(i) {
448                 if (memcg_init_list_lru_node(&lru->node[i]))
449                         goto fail;
450         }
451         return 0;
452 fail:
453         for (i = i - 1; i >= 0; i--) {
454                 if (!lru->node[i].memcg_lrus)
455                         continue;
456                 memcg_destroy_list_lru_node(&lru->node[i]);
457         }
458         return -ENOMEM;
459 }
460
461 static void memcg_destroy_list_lru(struct list_lru *lru)
462 {
463         int i;
464
465         if (!list_lru_memcg_aware(lru))
466                 return;
467
468         for_each_node(i)
469                 memcg_destroy_list_lru_node(&lru->node[i]);
470 }
471
472 static int memcg_update_list_lru(struct list_lru *lru,
473                                  int old_size, int new_size)
474 {
475         int i;
476
477         if (!list_lru_memcg_aware(lru))
478                 return 0;
479
480         for_each_node(i) {
481                 if (memcg_update_list_lru_node(&lru->node[i],
482                                                old_size, new_size))
483                         goto fail;
484         }
485         return 0;
486 fail:
487         for (i = i - 1; i >= 0; i--) {
488                 if (!lru->node[i].memcg_lrus)
489                         continue;
490
491                 memcg_cancel_update_list_lru_node(&lru->node[i],
492                                                   old_size, new_size);
493         }
494         return -ENOMEM;
495 }
496
497 static void memcg_cancel_update_list_lru(struct list_lru *lru,
498                                          int old_size, int new_size)
499 {
500         int i;
501
502         if (!list_lru_memcg_aware(lru))
503                 return;
504
505         for_each_node(i)
506                 memcg_cancel_update_list_lru_node(&lru->node[i],
507                                                   old_size, new_size);
508 }
509
510 int memcg_update_all_list_lrus(int new_size)
511 {
512         int ret = 0;
513         struct list_lru *lru;
514         int old_size = memcg_nr_cache_ids;
515
516         mutex_lock(&list_lrus_mutex);
517         list_for_each_entry(lru, &list_lrus, list) {
518                 ret = memcg_update_list_lru(lru, old_size, new_size);
519                 if (ret)
520                         goto fail;
521         }
522 out:
523         mutex_unlock(&list_lrus_mutex);
524         return ret;
525 fail:
526         list_for_each_entry_continue_reverse(lru, &list_lrus, list)
527                 memcg_cancel_update_list_lru(lru, old_size, new_size);
528         goto out;
529 }
530
531 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
532                                       int src_idx, struct mem_cgroup *dst_memcg)
533 {
534         struct list_lru_node *nlru = &lru->node[nid];
535         int dst_idx = dst_memcg->kmemcg_id;
536         struct list_lru_one *src, *dst;
537
538         /*
539          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
540          * we have to use IRQ-safe primitives here to avoid deadlock.
541          */
542         spin_lock_irq(&nlru->lock);
543
544         src = list_lru_from_memcg_idx(nlru, src_idx);
545         dst = list_lru_from_memcg_idx(nlru, dst_idx);
546
547         list_splice_init(&src->list, &dst->list);
548
549         if (src->nr_items) {
550                 dst->nr_items += src->nr_items;
551                 memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
552                 src->nr_items = 0;
553         }
554
555         spin_unlock_irq(&nlru->lock);
556 }
557
558 static void memcg_drain_list_lru(struct list_lru *lru,
559                                  int src_idx, struct mem_cgroup *dst_memcg)
560 {
561         int i;
562
563         if (!list_lru_memcg_aware(lru))
564                 return;
565
566         for_each_node(i)
567                 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
568 }
569
570 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
571 {
572         struct list_lru *lru;
573
574         mutex_lock(&list_lrus_mutex);
575         list_for_each_entry(lru, &list_lrus, list)
576                 memcg_drain_list_lru(lru, src_idx, dst_memcg);
577         mutex_unlock(&list_lrus_mutex);
578 }
579 #else
580 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
581 {
582         return 0;
583 }
584
585 static void memcg_destroy_list_lru(struct list_lru *lru)
586 {
587 }
588 #endif /* CONFIG_MEMCG_KMEM */
589
590 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
591                     struct lock_class_key *key, struct shrinker *shrinker)
592 {
593         int i;
594         int err = -ENOMEM;
595
596 #ifdef CONFIG_MEMCG_KMEM
597         if (shrinker)
598                 lru->shrinker_id = shrinker->id;
599         else
600                 lru->shrinker_id = -1;
601 #endif
602         memcg_get_cache_ids();
603
604         lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
605         if (!lru->node)
606                 goto out;
607
608         for_each_node(i) {
609                 spin_lock_init(&lru->node[i].lock);
610                 if (key)
611                         lockdep_set_class(&lru->node[i].lock, key);
612                 init_one_lru(&lru->node[i].lru);
613         }
614
615         err = memcg_init_list_lru(lru, memcg_aware);
616         if (err) {
617                 kfree(lru->node);
618                 /* Do this so a list_lru_destroy() doesn't crash: */
619                 lru->node = NULL;
620                 goto out;
621         }
622
623         list_lru_register(lru);
624 out:
625         memcg_put_cache_ids();
626         return err;
627 }
628 EXPORT_SYMBOL_GPL(__list_lru_init);
629
630 void list_lru_destroy(struct list_lru *lru)
631 {
632         /* Already destroyed or not yet initialized? */
633         if (!lru->node)
634                 return;
635
636         memcg_get_cache_ids();
637
638         list_lru_unregister(lru);
639
640         memcg_destroy_list_lru(lru);
641         kfree(lru->node);
642         lru->node = NULL;
643
644 #ifdef CONFIG_MEMCG_KMEM
645         lru->shrinker_id = -1;
646 #endif
647         memcg_put_cache_ids();
648 }
649 EXPORT_SYMBOL_GPL(list_lru_destroy);