Merge "kfence: Use pt_regs to generate stack trace on faults" into tizen
[platform/kernel/linux-rpi.git] / mm / zswap.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * zswap.c - zswap driver file
4  *
5  * zswap is a backend for frontswap that takes pages that are in the process
6  * of being swapped out and attempts to compress and store them in a
7  * RAM-based memory pool.  This can result in a significant I/O reduction on
8  * the swap device and, in the case where decompressing from RAM is faster
9  * than reading from the swap device, can also improve workload performance.
10  *
11  * Copyright (C) 2012  Seth Jennings <sjenning@linux.vnet.ibm.com>
12 */
13
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16 #include <linux/module.h>
17 #include <linux/cpu.h>
18 #include <linux/highmem.h>
19 #include <linux/slab.h>
20 #include <linux/spinlock.h>
21 #include <linux/local_lock.h>
22 #include <linux/types.h>
23 #include <linux/atomic.h>
24 #include <linux/frontswap.h>
25 #include <linux/rbtree.h>
26 #include <linux/swap.h>
27 #include <linux/crypto.h>
28 #include <linux/mempool.h>
29 #include <linux/zpool.h>
30
31 #include <linux/mm_types.h>
32 #include <linux/page-flags.h>
33 #include <linux/swapops.h>
34 #include <linux/writeback.h>
35 #include <linux/pagemap.h>
36 #include <linux/workqueue.h>
37
38 /*********************************
39 * statistics
40 **********************************/
41 /* Total bytes used by the compressed storage */
42 static u64 zswap_pool_total_size;
43 /* The number of compressed pages currently stored in zswap */
44 static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
45 /* The number of same-value filled pages currently stored in zswap */
46 static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
47
48 /*
49  * The statistics below are not protected from concurrent access for
50  * performance reasons so they may not be a 100% accurate.  However,
51  * they do provide useful information on roughly how many times a
52  * certain event is occurring.
53 */
54
55 /* Pool limit was hit (see zswap_max_pool_percent) */
56 static u64 zswap_pool_limit_hit;
57 /* Pages written back when pool limit was reached */
58 static u64 zswap_written_back_pages;
59 /* Store failed due to a reclaim failure after pool limit was reached */
60 static u64 zswap_reject_reclaim_fail;
61 /* Compressed page was too big for the allocator to (optimally) store */
62 static u64 zswap_reject_compress_poor;
63 /* Store failed because underlying allocator could not get memory */
64 static u64 zswap_reject_alloc_fail;
65 /* Store failed because the entry metadata could not be allocated (rare) */
66 static u64 zswap_reject_kmemcache_fail;
67 /* Duplicate store was encountered (rare) */
68 static u64 zswap_duplicate_entry;
69
70 /* Shrinker work queue */
71 static struct workqueue_struct *shrink_wq;
72 /* Pool limit was hit, we need to calm down */
73 static bool zswap_pool_reached_full;
74
75 /*********************************
76 * tunables
77 **********************************/
78
79 #define ZSWAP_PARAM_UNSET ""
80
81 /* Enable/disable zswap */
82 static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
83 static int zswap_enabled_param_set(const char *,
84                                    const struct kernel_param *);
85 static struct kernel_param_ops zswap_enabled_param_ops = {
86         .set =          zswap_enabled_param_set,
87         .get =          param_get_bool,
88 };
89 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
90
91 /* Crypto compressor to use */
92 static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
93 static int zswap_compressor_param_set(const char *,
94                                       const struct kernel_param *);
95 static struct kernel_param_ops zswap_compressor_param_ops = {
96         .set =          zswap_compressor_param_set,
97         .get =          param_get_charp,
98         .free =         param_free_charp,
99 };
100 module_param_cb(compressor, &zswap_compressor_param_ops,
101                 &zswap_compressor, 0644);
102
103 /* Compressed storage zpool to use */
104 static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
105 static int zswap_zpool_param_set(const char *, const struct kernel_param *);
106 static struct kernel_param_ops zswap_zpool_param_ops = {
107         .set =          zswap_zpool_param_set,
108         .get =          param_get_charp,
109         .free =         param_free_charp,
110 };
111 module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
112
113 /* The maximum percentage of memory that the compressed pool can occupy */
114 static unsigned int zswap_max_pool_percent = 20;
115 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
116
117 /* The threshold for accepting new pages after the max_pool_percent was hit */
118 static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
119 module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
120                    uint, 0644);
121
122 /* Enable/disable handling same-value filled pages (enabled by default) */
123 static bool zswap_same_filled_pages_enabled = true;
124 module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
125                    bool, 0644);
126
127 /*********************************
128 * data structures
129 **********************************/
130
131 struct zswap_pool {
132         struct zpool *zpool;
133         struct crypto_comp * __percpu *tfm;
134         struct kref kref;
135         struct list_head list;
136         struct work_struct release_work;
137         struct work_struct shrink_work;
138         struct hlist_node node;
139         char tfm_name[CRYPTO_MAX_ALG_NAME];
140 };
141
142 /*
143  * struct zswap_entry
144  *
145  * This structure contains the metadata for tracking a single compressed
146  * page within zswap.
147  *
148  * rbnode - links the entry into red-black tree for the appropriate swap type
149  * offset - the swap offset for the entry.  Index into the red-black tree.
150  * refcount - the number of outstanding reference to the entry. This is needed
151  *            to protect against premature freeing of the entry by code
152  *            concurrent calls to load, invalidate, and writeback.  The lock
153  *            for the zswap_tree structure that contains the entry must
154  *            be held while changing the refcount.  Since the lock must
155  *            be held, there is no reason to also make refcount atomic.
156  * length - the length in bytes of the compressed page data.  Needed during
157  *          decompression. For a same value filled page length is 0.
158  * pool - the zswap_pool the entry's data is in
159  * handle - zpool allocation handle that stores the compressed page data
160  * value - value of the same-value filled pages which have same content
161  */
162 struct zswap_entry {
163         struct rb_node rbnode;
164         pgoff_t offset;
165         int refcount;
166         unsigned int length;
167         struct zswap_pool *pool;
168         union {
169                 unsigned long handle;
170                 unsigned long value;
171         };
172 };
173
174 struct zswap_header {
175         swp_entry_t swpentry;
176 };
177
178 /*
179  * The tree lock in the zswap_tree struct protects a few things:
180  * - the rbtree
181  * - the refcount field of each entry in the tree
182  */
183 struct zswap_tree {
184         struct rb_root rbroot;
185         spinlock_t lock;
186 };
187
188 static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
189
190 /* RCU-protected iteration */
191 static LIST_HEAD(zswap_pools);
192 /* protects zswap_pools list modification */
193 static DEFINE_SPINLOCK(zswap_pools_lock);
194 /* pool counter to provide unique names to zpool */
195 static atomic_t zswap_pools_count = ATOMIC_INIT(0);
196
197 /* used by param callback function */
198 static bool zswap_init_started;
199
200 /* fatal error during init */
201 static bool zswap_init_failed;
202
203 /* init completed, but couldn't create the initial pool */
204 static bool zswap_has_pool;
205
206 /*********************************
207 * helpers and fwd declarations
208 **********************************/
209
210 #define zswap_pool_debug(msg, p)                                \
211         pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
212                  zpool_get_type((p)->zpool))
213
214 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
215 static int zswap_pool_get(struct zswap_pool *pool);
216 static void zswap_pool_put(struct zswap_pool *pool);
217
218 static const struct zpool_ops zswap_zpool_ops = {
219         .evict = zswap_writeback_entry
220 };
221
222 static bool zswap_is_full(void)
223 {
224         return totalram_pages() * zswap_max_pool_percent / 100 <
225                         DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
226 }
227
228 static bool zswap_can_accept(void)
229 {
230         return totalram_pages() * zswap_accept_thr_percent / 100 *
231                                 zswap_max_pool_percent / 100 >
232                         DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
233 }
234
235 static void zswap_update_total_size(void)
236 {
237         struct zswap_pool *pool;
238         u64 total = 0;
239
240         rcu_read_lock();
241
242         list_for_each_entry_rcu(pool, &zswap_pools, list)
243                 total += zpool_get_total_size(pool->zpool);
244
245         rcu_read_unlock();
246
247         zswap_pool_total_size = total;
248 }
249
250 /*********************************
251 * zswap entry functions
252 **********************************/
253 static struct kmem_cache *zswap_entry_cache;
254
255 static int __init zswap_entry_cache_create(void)
256 {
257         zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
258         return zswap_entry_cache == NULL;
259 }
260
261 static void __init zswap_entry_cache_destroy(void)
262 {
263         kmem_cache_destroy(zswap_entry_cache);
264 }
265
266 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
267 {
268         struct zswap_entry *entry;
269         entry = kmem_cache_alloc(zswap_entry_cache, gfp);
270         if (!entry)
271                 return NULL;
272         entry->refcount = 1;
273         RB_CLEAR_NODE(&entry->rbnode);
274         return entry;
275 }
276
277 static void zswap_entry_cache_free(struct zswap_entry *entry)
278 {
279         kmem_cache_free(zswap_entry_cache, entry);
280 }
281
282 /*********************************
283 * rbtree functions
284 **********************************/
285 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
286 {
287         struct rb_node *node = root->rb_node;
288         struct zswap_entry *entry;
289
290         while (node) {
291                 entry = rb_entry(node, struct zswap_entry, rbnode);
292                 if (entry->offset > offset)
293                         node = node->rb_left;
294                 else if (entry->offset < offset)
295                         node = node->rb_right;
296                 else
297                         return entry;
298         }
299         return NULL;
300 }
301
302 /*
303  * In the case that a entry with the same offset is found, a pointer to
304  * the existing entry is stored in dupentry and the function returns -EEXIST
305  */
306 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
307                         struct zswap_entry **dupentry)
308 {
309         struct rb_node **link = &root->rb_node, *parent = NULL;
310         struct zswap_entry *myentry;
311
312         while (*link) {
313                 parent = *link;
314                 myentry = rb_entry(parent, struct zswap_entry, rbnode);
315                 if (myentry->offset > entry->offset)
316                         link = &(*link)->rb_left;
317                 else if (myentry->offset < entry->offset)
318                         link = &(*link)->rb_right;
319                 else {
320                         *dupentry = myentry;
321                         return -EEXIST;
322                 }
323         }
324         rb_link_node(&entry->rbnode, parent, link);
325         rb_insert_color(&entry->rbnode, root);
326         return 0;
327 }
328
329 static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
330 {
331         if (!RB_EMPTY_NODE(&entry->rbnode)) {
332                 rb_erase(&entry->rbnode, root);
333                 RB_CLEAR_NODE(&entry->rbnode);
334         }
335 }
336
337 /*
338  * Carries out the common pattern of freeing and entry's zpool allocation,
339  * freeing the entry itself, and decrementing the number of stored pages.
340  */
341 static void zswap_free_entry(struct zswap_entry *entry)
342 {
343         if (!entry->length)
344                 atomic_dec(&zswap_same_filled_pages);
345         else {
346                 zpool_free(entry->pool->zpool, entry->handle);
347                 zswap_pool_put(entry->pool);
348         }
349         zswap_entry_cache_free(entry);
350         atomic_dec(&zswap_stored_pages);
351         zswap_update_total_size();
352 }
353
354 /* caller must hold the tree lock */
355 static void zswap_entry_get(struct zswap_entry *entry)
356 {
357         entry->refcount++;
358 }
359
360 /* caller must hold the tree lock
361 * remove from the tree and free it, if nobody reference the entry
362 */
363 static void zswap_entry_put(struct zswap_tree *tree,
364                         struct zswap_entry *entry)
365 {
366         int refcount = --entry->refcount;
367
368         BUG_ON(refcount < 0);
369         if (refcount == 0) {
370                 zswap_rb_erase(&tree->rbroot, entry);
371                 zswap_free_entry(entry);
372         }
373 }
374
375 /* caller must hold the tree lock */
376 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
377                                 pgoff_t offset)
378 {
379         struct zswap_entry *entry;
380
381         entry = zswap_rb_search(root, offset);
382         if (entry)
383                 zswap_entry_get(entry);
384
385         return entry;
386 }
387
388 /*********************************
389 * per-cpu code
390 **********************************/
391 struct zswap_comp {
392         /* Used for per-CPU dstmem and tfm */
393         local_lock_t lock;
394         u8 *dstmem;
395 };
396
397 static DEFINE_PER_CPU(struct zswap_comp, zswap_comp) = {
398         .lock = INIT_LOCAL_LOCK(lock),
399 };
400
401 static int zswap_dstmem_prepare(unsigned int cpu)
402 {
403         struct zswap_comp *zcomp;
404         u8 *dst;
405
406         dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
407         if (!dst)
408                 return -ENOMEM;
409
410         zcomp = per_cpu_ptr(&zswap_comp, cpu);
411         zcomp->dstmem = dst;
412         return 0;
413 }
414
415 static int zswap_dstmem_dead(unsigned int cpu)
416 {
417         struct zswap_comp *zcomp;
418
419         zcomp = per_cpu_ptr(&zswap_comp, cpu);
420         kfree(zcomp->dstmem);
421         zcomp->dstmem = NULL;
422
423         return 0;
424 }
425
426 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
427 {
428         struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
429         struct crypto_comp *tfm;
430
431         if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
432                 return 0;
433
434         tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
435         if (IS_ERR_OR_NULL(tfm)) {
436                 pr_err("could not alloc crypto comp %s : %ld\n",
437                        pool->tfm_name, PTR_ERR(tfm));
438                 return -ENOMEM;
439         }
440         *per_cpu_ptr(pool->tfm, cpu) = tfm;
441         return 0;
442 }
443
444 static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
445 {
446         struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
447         struct crypto_comp *tfm;
448
449         tfm = *per_cpu_ptr(pool->tfm, cpu);
450         if (!IS_ERR_OR_NULL(tfm))
451                 crypto_free_comp(tfm);
452         *per_cpu_ptr(pool->tfm, cpu) = NULL;
453         return 0;
454 }
455
456 /*********************************
457 * pool functions
458 **********************************/
459
460 static struct zswap_pool *__zswap_pool_current(void)
461 {
462         struct zswap_pool *pool;
463
464         pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
465         WARN_ONCE(!pool && zswap_has_pool,
466                   "%s: no page storage pool!\n", __func__);
467
468         return pool;
469 }
470
471 static struct zswap_pool *zswap_pool_current(void)
472 {
473         assert_spin_locked(&zswap_pools_lock);
474
475         return __zswap_pool_current();
476 }
477
478 static struct zswap_pool *zswap_pool_current_get(void)
479 {
480         struct zswap_pool *pool;
481
482         rcu_read_lock();
483
484         pool = __zswap_pool_current();
485         if (!zswap_pool_get(pool))
486                 pool = NULL;
487
488         rcu_read_unlock();
489
490         return pool;
491 }
492
493 static struct zswap_pool *zswap_pool_last_get(void)
494 {
495         struct zswap_pool *pool, *last = NULL;
496
497         rcu_read_lock();
498
499         list_for_each_entry_rcu(pool, &zswap_pools, list)
500                 last = pool;
501         WARN_ONCE(!last && zswap_has_pool,
502                   "%s: no page storage pool!\n", __func__);
503         if (!zswap_pool_get(last))
504                 last = NULL;
505
506         rcu_read_unlock();
507
508         return last;
509 }
510
511 /* type and compressor must be null-terminated */
512 static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
513 {
514         struct zswap_pool *pool;
515
516         assert_spin_locked(&zswap_pools_lock);
517
518         list_for_each_entry_rcu(pool, &zswap_pools, list) {
519                 if (strcmp(pool->tfm_name, compressor))
520                         continue;
521                 if (strcmp(zpool_get_type(pool->zpool), type))
522                         continue;
523                 /* if we can't get it, it's about to be destroyed */
524                 if (!zswap_pool_get(pool))
525                         continue;
526                 return pool;
527         }
528
529         return NULL;
530 }
531
532 static void shrink_worker(struct work_struct *w)
533 {
534         struct zswap_pool *pool = container_of(w, typeof(*pool),
535                                                 shrink_work);
536
537         if (zpool_shrink(pool->zpool, 1, NULL))
538                 zswap_reject_reclaim_fail++;
539         zswap_pool_put(pool);
540 }
541
542 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
543 {
544         struct zswap_pool *pool;
545         char name[38]; /* 'zswap' + 32 char (max) num + \0 */
546         gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
547         int ret;
548
549         if (!zswap_has_pool) {
550                 /* if either are unset, pool initialization failed, and we
551                  * need both params to be set correctly before trying to
552                  * create a pool.
553                  */
554                 if (!strcmp(type, ZSWAP_PARAM_UNSET))
555                         return NULL;
556                 if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
557                         return NULL;
558         }
559
560         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
561         if (!pool)
562                 return NULL;
563
564         /* unique name for each pool specifically required by zsmalloc */
565         snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
566
567         pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
568         if (!pool->zpool) {
569                 pr_err("%s zpool not available\n", type);
570                 goto error;
571         }
572         pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
573
574         strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
575         pool->tfm = alloc_percpu(struct crypto_comp *);
576         if (!pool->tfm) {
577                 pr_err("percpu alloc failed\n");
578                 goto error;
579         }
580
581         ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
582                                        &pool->node);
583         if (ret)
584                 goto error;
585         pr_debug("using %s compressor\n", pool->tfm_name);
586
587         /* being the current pool takes 1 ref; this func expects the
588          * caller to always add the new pool as the current pool
589          */
590         kref_init(&pool->kref);
591         INIT_LIST_HEAD(&pool->list);
592         INIT_WORK(&pool->shrink_work, shrink_worker);
593
594         zswap_pool_debug("created", pool);
595
596         return pool;
597
598 error:
599         free_percpu(pool->tfm);
600         if (pool->zpool)
601                 zpool_destroy_pool(pool->zpool);
602         kfree(pool);
603         return NULL;
604 }
605
606 static bool zswap_try_pool_create(void)
607 {
608         struct zswap_pool *pool;
609         bool has_comp, has_zpool;
610
611         has_comp = crypto_has_comp(zswap_compressor, 0, 0);
612         if (!has_comp && strcmp(zswap_compressor,
613                                 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
614                 pr_err("compressor %s not available, using default %s\n",
615                        zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
616                 param_free_charp(&zswap_compressor);
617                 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
618                 has_comp = crypto_has_comp(zswap_compressor, 0, 0);
619         }
620         if (!has_comp) {
621                 pr_err("default compressor %s not available\n",
622                        zswap_compressor);
623                 param_free_charp(&zswap_compressor);
624                 zswap_compressor = ZSWAP_PARAM_UNSET;
625         }
626
627         has_zpool = zpool_has_pool(zswap_zpool_type);
628         if (!has_zpool && strcmp(zswap_zpool_type,
629                                  CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
630                 pr_err("zpool %s not available, using default %s\n",
631                        zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
632                 param_free_charp(&zswap_zpool_type);
633                 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
634                 has_zpool = zpool_has_pool(zswap_zpool_type);
635         }
636         if (!has_zpool) {
637                 pr_err("default zpool %s not available\n",
638                        zswap_zpool_type);
639                 param_free_charp(&zswap_zpool_type);
640                 zswap_zpool_type = ZSWAP_PARAM_UNSET;
641         }
642
643         if (!has_comp || !has_zpool)
644                 return false;
645
646         pool = zswap_pool_create(zswap_zpool_type, zswap_compressor);
647
648         if (pool) {
649                 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
650                         zpool_get_type(pool->zpool));
651                 list_add(&pool->list, &zswap_pools);
652                 zswap_has_pool = true;
653         } else {
654                 pr_err("pool creation failed\n");
655                 zswap_enabled = false;
656         }
657
658         return zswap_enabled;
659 }
660
661 static void zswap_pool_destroy(struct zswap_pool *pool)
662 {
663         zswap_pool_debug("destroying", pool);
664
665         cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
666         free_percpu(pool->tfm);
667         zpool_destroy_pool(pool->zpool);
668         kfree(pool);
669 }
670
671 static int __must_check zswap_pool_get(struct zswap_pool *pool)
672 {
673         if (!pool)
674                 return 0;
675
676         return kref_get_unless_zero(&pool->kref);
677 }
678
679 static void __zswap_pool_release(struct work_struct *work)
680 {
681         struct zswap_pool *pool = container_of(work, typeof(*pool),
682                                                 release_work);
683
684         synchronize_rcu();
685
686         /* nobody should have been able to get a kref... */
687         WARN_ON(kref_get_unless_zero(&pool->kref));
688
689         /* pool is now off zswap_pools list and has no references. */
690         zswap_pool_destroy(pool);
691 }
692
693 static void __zswap_pool_empty(struct kref *kref)
694 {
695         struct zswap_pool *pool;
696
697         pool = container_of(kref, typeof(*pool), kref);
698
699         spin_lock(&zswap_pools_lock);
700
701         WARN_ON(pool == zswap_pool_current());
702
703         list_del_rcu(&pool->list);
704
705         INIT_WORK(&pool->release_work, __zswap_pool_release);
706         schedule_work(&pool->release_work);
707
708         spin_unlock(&zswap_pools_lock);
709 }
710
711 static void zswap_pool_put(struct zswap_pool *pool)
712 {
713         kref_put(&pool->kref, __zswap_pool_empty);
714 }
715
716 /*********************************
717 * param callbacks
718 **********************************/
719
720 /* val must be a null-terminated string */
721 static int __zswap_param_set(const char *val, const struct kernel_param *kp,
722                              char *type, char *compressor)
723 {
724         struct zswap_pool *pool, *put_pool = NULL;
725         char *s = strstrip((char *)val);
726         int ret;
727
728         if (zswap_init_failed) {
729                 pr_err("can't set param, initialization failed\n");
730                 return -ENODEV;
731         }
732
733         /* no change required */
734         if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
735                 return 0;
736
737         /* if this is load-time (pre-init) param setting,
738          * don't create a pool; that's done during init.
739          */
740         if (!zswap_init_started)
741                 return param_set_charp(s, kp);
742
743         if (!type) {
744                 if (!zpool_has_pool(s)) {
745                         pr_err("zpool %s not available\n", s);
746                         return -ENOENT;
747                 }
748                 type = s;
749         } else if (!compressor) {
750                 if (!crypto_has_comp(s, 0, 0)) {
751                         pr_err("compressor %s not available\n", s);
752                         return -ENOENT;
753                 }
754                 compressor = s;
755         } else {
756                 WARN_ON(1);
757                 return -EINVAL;
758         }
759
760         spin_lock(&zswap_pools_lock);
761
762         pool = zswap_pool_find_get(type, compressor);
763         if (pool) {
764                 zswap_pool_debug("using existing", pool);
765                 WARN_ON(pool == zswap_pool_current());
766                 list_del_rcu(&pool->list);
767         }
768
769         spin_unlock(&zswap_pools_lock);
770
771         if (!pool)
772                 pool = zswap_pool_create(type, compressor);
773
774         if (pool)
775                 ret = param_set_charp(s, kp);
776         else
777                 ret = -EINVAL;
778
779         spin_lock(&zswap_pools_lock);
780
781         if (!ret) {
782                 put_pool = zswap_pool_current();
783                 list_add_rcu(&pool->list, &zswap_pools);
784                 zswap_has_pool = true;
785         } else if (pool) {
786                 /* add the possibly pre-existing pool to the end of the pools
787                  * list; if it's new (and empty) then it'll be removed and
788                  * destroyed by the put after we drop the lock
789                  */
790                 list_add_tail_rcu(&pool->list, &zswap_pools);
791                 put_pool = pool;
792         }
793
794         spin_unlock(&zswap_pools_lock);
795
796         if (!zswap_has_pool && !pool) {
797                 /* if initial pool creation failed, and this pool creation also
798                  * failed, maybe both compressor and zpool params were bad.
799                  * Allow changing this param, so pool creation will succeed
800                  * when the other param is changed. We already verified this
801                  * param is ok in the zpool_has_pool() or crypto_has_comp()
802                  * checks above.
803                  */
804                 ret = param_set_charp(s, kp);
805         }
806
807         /* drop the ref from either the old current pool,
808          * or the new pool we failed to add
809          */
810         if (put_pool)
811                 zswap_pool_put(put_pool);
812
813         return ret;
814 }
815
816 static int zswap_compressor_param_set(const char *val,
817                                       const struct kernel_param *kp)
818 {
819         return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
820 }
821
822 static int zswap_zpool_param_set(const char *val,
823                                  const struct kernel_param *kp)
824 {
825         return __zswap_param_set(val, kp, NULL, zswap_compressor);
826 }
827
828 static int zswap_enabled_param_set(const char *val,
829                                    const struct kernel_param *kp)
830 {
831         int ret;
832
833         if (zswap_init_failed) {
834                 pr_err("can't enable, initialization failed\n");
835                 return -ENODEV;
836         }
837
838         ret = param_set_bool(val, kp);
839         if (!ret && zswap_enabled && zswap_init_started && !zswap_has_pool)
840                 if (!zswap_try_pool_create())
841                         ret = -ENODEV;
842
843         return ret;
844 }
845
846 /*********************************
847 * writeback code
848 **********************************/
849 /* return enum for zswap_get_swap_cache_page */
850 enum zswap_get_swap_ret {
851         ZSWAP_SWAPCACHE_NEW,
852         ZSWAP_SWAPCACHE_EXIST,
853         ZSWAP_SWAPCACHE_FAIL,
854 };
855
856 /*
857  * zswap_get_swap_cache_page
858  *
859  * This is an adaption of read_swap_cache_async()
860  *
861  * This function tries to find a page with the given swap entry
862  * in the swapper_space address space (the swap cache).  If the page
863  * is found, it is returned in retpage.  Otherwise, a page is allocated,
864  * added to the swap cache, and returned in retpage.
865  *
866  * If success, the swap cache page is returned in retpage
867  * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
868  * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
869  *     the new page is added to swapcache and locked
870  * Returns ZSWAP_SWAPCACHE_FAIL on error
871  */
872 static int zswap_get_swap_cache_page(swp_entry_t entry,
873                                 struct page **retpage)
874 {
875         bool page_was_allocated;
876
877         *retpage = __read_swap_cache_async(entry, GFP_KERNEL,
878                         NULL, 0, &page_was_allocated);
879         if (page_was_allocated)
880                 return ZSWAP_SWAPCACHE_NEW;
881         if (!*retpage)
882                 return ZSWAP_SWAPCACHE_FAIL;
883         return ZSWAP_SWAPCACHE_EXIST;
884 }
885
886 /*
887  * Attempts to free an entry by adding a page to the swap cache,
888  * decompressing the entry data into the page, and issuing a
889  * bio write to write the page back to the swap device.
890  *
891  * This can be thought of as a "resumed writeback" of the page
892  * to the swap device.  We are basically resuming the same swap
893  * writeback path that was intercepted with the frontswap_store()
894  * in the first place.  After the page has been decompressed into
895  * the swap cache, the compressed version stored by zswap can be
896  * freed.
897  */
898 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
899 {
900         struct zswap_header *zhdr;
901         swp_entry_t swpentry;
902         struct zswap_tree *tree;
903         pgoff_t offset;
904         struct zswap_entry *entry;
905         struct page *page;
906         struct crypto_comp *tfm;
907         u8 *src, *dst;
908         unsigned int dlen;
909         int ret;
910         struct writeback_control wbc = {
911                 .sync_mode = WB_SYNC_NONE,
912         };
913
914         /* extract swpentry from data */
915         zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
916         swpentry = zhdr->swpentry; /* here */
917         tree = zswap_trees[swp_type(swpentry)];
918         offset = swp_offset(swpentry);
919
920         /* find and ref zswap entry */
921         spin_lock(&tree->lock);
922         entry = zswap_entry_find_get(&tree->rbroot, offset);
923         if (!entry) {
924                 /* entry was invalidated */
925                 spin_unlock(&tree->lock);
926                 zpool_unmap_handle(pool, handle);
927                 return 0;
928         }
929         spin_unlock(&tree->lock);
930         BUG_ON(offset != entry->offset);
931
932         /* try to allocate swap cache page */
933         switch (zswap_get_swap_cache_page(swpentry, &page)) {
934         case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
935                 ret = -ENOMEM;
936                 goto fail;
937
938         case ZSWAP_SWAPCACHE_EXIST:
939                 /* page is already in the swap cache, ignore for now */
940                 put_page(page);
941                 ret = -EEXIST;
942                 goto fail;
943
944         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
945                 /* decompress */
946                 dlen = PAGE_SIZE;
947                 src = (u8 *)zhdr + sizeof(struct zswap_header);
948                 dst = kmap_atomic(page);
949                 local_lock(&zswap_comp.lock);
950                 tfm = *this_cpu_ptr(entry->pool->tfm);
951                 ret = crypto_comp_decompress(tfm, src, entry->length,
952                                              dst, &dlen);
953                 local_unlock(&zswap_comp.lock);
954                 kunmap_atomic(dst);
955                 BUG_ON(ret);
956                 BUG_ON(dlen != PAGE_SIZE);
957
958                 /* page is up to date */
959                 SetPageUptodate(page);
960         }
961
962         /* move it to the tail of the inactive list after end_writeback */
963         SetPageReclaim(page);
964
965         /* start writeback */
966         __swap_writepage(page, &wbc, end_swap_bio_write);
967         put_page(page);
968         zswap_written_back_pages++;
969
970         spin_lock(&tree->lock);
971         /* drop local reference */
972         zswap_entry_put(tree, entry);
973
974         /*
975         * There are two possible situations for entry here:
976         * (1) refcount is 1(normal case),  entry is valid and on the tree
977         * (2) refcount is 0, entry is freed and not on the tree
978         *     because invalidate happened during writeback
979         *  search the tree and free the entry if find entry
980         */
981         if (entry == zswap_rb_search(&tree->rbroot, offset))
982                 zswap_entry_put(tree, entry);
983         spin_unlock(&tree->lock);
984
985         goto end;
986
987         /*
988         * if we get here due to ZSWAP_SWAPCACHE_EXIST
989         * a load may happening concurrently
990         * it is safe and okay to not free the entry
991         * if we free the entry in the following put
992         * it it either okay to return !0
993         */
994 fail:
995         spin_lock(&tree->lock);
996         zswap_entry_put(tree, entry);
997         spin_unlock(&tree->lock);
998
999 end:
1000         zpool_unmap_handle(pool, handle);
1001         return ret;
1002 }
1003
1004 static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
1005 {
1006         unsigned int pos;
1007         unsigned long *page;
1008
1009         page = (unsigned long *)ptr;
1010         for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
1011                 if (page[pos] != page[0])
1012                         return 0;
1013         }
1014         *value = page[0];
1015         return 1;
1016 }
1017
1018 static void zswap_fill_page(void *ptr, unsigned long value)
1019 {
1020         unsigned long *page;
1021
1022         page = (unsigned long *)ptr;
1023         memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
1024 }
1025
1026 /*********************************
1027 * frontswap hooks
1028 **********************************/
1029 /* attempts to compress and store an single page */
1030 static int zswap_frontswap_store(unsigned type, pgoff_t offset,
1031                                 struct page *page)
1032 {
1033         struct zswap_tree *tree = zswap_trees[type];
1034         struct zswap_entry *entry, *dupentry;
1035         struct crypto_comp *tfm;
1036         int ret;
1037         unsigned int hlen, dlen = PAGE_SIZE;
1038         unsigned long handle, value;
1039         char *buf;
1040         u8 *src, *dst;
1041         struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
1042         gfp_t gfp;
1043
1044         /* THP isn't supported */
1045         if (PageTransHuge(page)) {
1046                 ret = -EINVAL;
1047                 goto reject;
1048         }
1049
1050         if (!zswap_enabled || !tree) {
1051                 ret = -ENODEV;
1052                 goto reject;
1053         }
1054
1055         /* reclaim space if needed */
1056         if (zswap_is_full()) {
1057                 struct zswap_pool *pool;
1058
1059                 zswap_pool_limit_hit++;
1060                 zswap_pool_reached_full = true;
1061                 pool = zswap_pool_last_get();
1062                 if (pool)
1063                         queue_work(shrink_wq, &pool->shrink_work);
1064                 ret = -ENOMEM;
1065                 goto reject;
1066         }
1067
1068         if (zswap_pool_reached_full) {
1069                if (!zswap_can_accept()) {
1070                         ret = -ENOMEM;
1071                         goto reject;
1072                 } else
1073                         zswap_pool_reached_full = false;
1074         }
1075
1076         /* allocate entry */
1077         entry = zswap_entry_cache_alloc(GFP_KERNEL);
1078         if (!entry) {
1079                 zswap_reject_kmemcache_fail++;
1080                 ret = -ENOMEM;
1081                 goto reject;
1082         }
1083
1084         if (zswap_same_filled_pages_enabled) {
1085                 src = kmap_atomic(page);
1086                 if (zswap_is_page_same_filled(src, &value)) {
1087                         kunmap_atomic(src);
1088                         entry->offset = offset;
1089                         entry->length = 0;
1090                         entry->value = value;
1091                         atomic_inc(&zswap_same_filled_pages);
1092                         goto insert_entry;
1093                 }
1094                 kunmap_atomic(src);
1095         }
1096
1097         /* if entry is successfully added, it keeps the reference */
1098         entry->pool = zswap_pool_current_get();
1099         if (!entry->pool) {
1100                 ret = -EINVAL;
1101                 goto freepage;
1102         }
1103
1104         /* compress */
1105         local_lock(&zswap_comp.lock);
1106         dst = *this_cpu_ptr(&zswap_comp.dstmem);
1107         tfm = *this_cpu_ptr(entry->pool->tfm);
1108         src = kmap_atomic(page);
1109         ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
1110         kunmap_atomic(src);
1111         if (ret) {
1112                 ret = -EINVAL;
1113                 goto put_dstmem;
1114         }
1115
1116         /* store */
1117         hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
1118         gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1119         if (zpool_malloc_support_movable(entry->pool->zpool))
1120                 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1121         ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
1122         if (ret == -ENOSPC) {
1123                 zswap_reject_compress_poor++;
1124                 goto put_dstmem;
1125         }
1126         if (ret) {
1127                 zswap_reject_alloc_fail++;
1128                 goto put_dstmem;
1129         }
1130         buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
1131         memcpy(buf, &zhdr, hlen);
1132         memcpy(buf + hlen, dst, dlen);
1133         zpool_unmap_handle(entry->pool->zpool, handle);
1134         local_unlock(&zswap_comp.lock);
1135
1136         /* populate entry */
1137         entry->offset = offset;
1138         entry->handle = handle;
1139         entry->length = dlen;
1140
1141 insert_entry:
1142         /* map */
1143         spin_lock(&tree->lock);
1144         do {
1145                 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
1146                 if (ret == -EEXIST) {
1147                         zswap_duplicate_entry++;
1148                         /* remove from rbtree */
1149                         zswap_rb_erase(&tree->rbroot, dupentry);
1150                         zswap_entry_put(tree, dupentry);
1151                 }
1152         } while (ret == -EEXIST);
1153         spin_unlock(&tree->lock);
1154
1155         /* update stats */
1156         atomic_inc(&zswap_stored_pages);
1157         zswap_update_total_size();
1158
1159         return 0;
1160
1161 put_dstmem:
1162         local_unlock(&zswap_comp.lock);
1163         zswap_pool_put(entry->pool);
1164 freepage:
1165         zswap_entry_cache_free(entry);
1166 reject:
1167         return ret;
1168 }
1169
1170 /*
1171  * returns 0 if the page was successfully decompressed
1172  * return -1 on entry not found or error
1173 */
1174 static int zswap_frontswap_load(unsigned type, pgoff_t offset,
1175                                 struct page *page)
1176 {
1177         struct zswap_tree *tree = zswap_trees[type];
1178         struct zswap_entry *entry;
1179         struct crypto_comp *tfm;
1180         u8 *src, *dst;
1181         unsigned int dlen;
1182         int ret;
1183
1184         /* find */
1185         spin_lock(&tree->lock);
1186         entry = zswap_entry_find_get(&tree->rbroot, offset);
1187         if (!entry) {
1188                 /* entry was written back */
1189                 spin_unlock(&tree->lock);
1190                 return -1;
1191         }
1192         spin_unlock(&tree->lock);
1193
1194         if (!entry->length) {
1195                 dst = kmap_atomic(page);
1196                 zswap_fill_page(dst, entry->value);
1197                 kunmap_atomic(dst);
1198                 goto freeentry;
1199         }
1200
1201         /* decompress */
1202         dlen = PAGE_SIZE;
1203         src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
1204         if (zpool_evictable(entry->pool->zpool))
1205                 src += sizeof(struct zswap_header);
1206         dst = kmap_atomic(page);
1207         local_lock(&zswap_comp.lock);
1208         tfm = *this_cpu_ptr(entry->pool->tfm);
1209         ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
1210         local_unlock(&zswap_comp.lock);
1211         kunmap_atomic(dst);
1212         zpool_unmap_handle(entry->pool->zpool, entry->handle);
1213         BUG_ON(ret);
1214
1215 freeentry:
1216         spin_lock(&tree->lock);
1217         zswap_entry_put(tree, entry);
1218         spin_unlock(&tree->lock);
1219
1220         return 0;
1221 }
1222
1223 /* frees an entry in zswap */
1224 static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
1225 {
1226         struct zswap_tree *tree = zswap_trees[type];
1227         struct zswap_entry *entry;
1228
1229         /* find */
1230         spin_lock(&tree->lock);
1231         entry = zswap_rb_search(&tree->rbroot, offset);
1232         if (!entry) {
1233                 /* entry was written back */
1234                 spin_unlock(&tree->lock);
1235                 return;
1236         }
1237
1238         /* remove from rbtree */
1239         zswap_rb_erase(&tree->rbroot, entry);
1240
1241         /* drop the initial reference from entry creation */
1242         zswap_entry_put(tree, entry);
1243
1244         spin_unlock(&tree->lock);
1245 }
1246
1247 /* frees all zswap entries for the given swap type */
1248 static void zswap_frontswap_invalidate_area(unsigned type)
1249 {
1250         struct zswap_tree *tree = zswap_trees[type];
1251         struct zswap_entry *entry, *n;
1252
1253         if (!tree)
1254                 return;
1255
1256         /* walk the tree and free everything */
1257         spin_lock(&tree->lock);
1258         rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
1259                 zswap_free_entry(entry);
1260         tree->rbroot = RB_ROOT;
1261         spin_unlock(&tree->lock);
1262         kfree(tree);
1263         zswap_trees[type] = NULL;
1264 }
1265
1266 static void zswap_frontswap_init(unsigned type)
1267 {
1268         struct zswap_tree *tree;
1269
1270         tree = kzalloc(sizeof(*tree), GFP_KERNEL);
1271         if (!tree) {
1272                 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
1273                 return;
1274         }
1275
1276         tree->rbroot = RB_ROOT;
1277         spin_lock_init(&tree->lock);
1278         zswap_trees[type] = tree;
1279 }
1280
1281 static struct frontswap_ops zswap_frontswap_ops = {
1282         .store = zswap_frontswap_store,
1283         .load = zswap_frontswap_load,
1284         .invalidate_page = zswap_frontswap_invalidate_page,
1285         .invalidate_area = zswap_frontswap_invalidate_area,
1286         .init = zswap_frontswap_init
1287 };
1288
1289 /*********************************
1290 * debugfs functions
1291 **********************************/
1292 #ifdef CONFIG_DEBUG_FS
1293 #include <linux/debugfs.h>
1294
1295 static struct dentry *zswap_debugfs_root;
1296
1297 static int __init zswap_debugfs_init(void)
1298 {
1299         if (!debugfs_initialized())
1300                 return -ENODEV;
1301
1302         zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
1303
1304         debugfs_create_u64("pool_limit_hit", 0444,
1305                            zswap_debugfs_root, &zswap_pool_limit_hit);
1306         debugfs_create_u64("reject_reclaim_fail", 0444,
1307                            zswap_debugfs_root, &zswap_reject_reclaim_fail);
1308         debugfs_create_u64("reject_alloc_fail", 0444,
1309                            zswap_debugfs_root, &zswap_reject_alloc_fail);
1310         debugfs_create_u64("reject_kmemcache_fail", 0444,
1311                            zswap_debugfs_root, &zswap_reject_kmemcache_fail);
1312         debugfs_create_u64("reject_compress_poor", 0444,
1313                            zswap_debugfs_root, &zswap_reject_compress_poor);
1314         debugfs_create_u64("written_back_pages", 0444,
1315                            zswap_debugfs_root, &zswap_written_back_pages);
1316         debugfs_create_u64("duplicate_entry", 0444,
1317                            zswap_debugfs_root, &zswap_duplicate_entry);
1318         debugfs_create_u64("pool_total_size", 0444,
1319                            zswap_debugfs_root, &zswap_pool_total_size);
1320         debugfs_create_atomic_t("stored_pages", 0444,
1321                                 zswap_debugfs_root, &zswap_stored_pages);
1322         debugfs_create_atomic_t("same_filled_pages", 0444,
1323                                 zswap_debugfs_root, &zswap_same_filled_pages);
1324
1325         return 0;
1326 }
1327
1328 static void __exit zswap_debugfs_exit(void)
1329 {
1330         debugfs_remove_recursive(zswap_debugfs_root);
1331 }
1332 #else
1333 static int __init zswap_debugfs_init(void)
1334 {
1335         return 0;
1336 }
1337
1338 static void __exit zswap_debugfs_exit(void) { }
1339 #endif
1340
1341 /*********************************
1342 * module init and exit
1343 **********************************/
1344 static int __init init_zswap(void)
1345 {
1346         int ret;
1347
1348         zswap_init_started = true;
1349
1350         if (zswap_entry_cache_create()) {
1351                 pr_err("entry cache creation failed\n");
1352                 goto cache_fail;
1353         }
1354
1355         ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare",
1356                                 zswap_dstmem_prepare, zswap_dstmem_dead);
1357         if (ret) {
1358                 pr_err("dstmem alloc failed\n");
1359                 goto dstmem_fail;
1360         }
1361
1362         ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
1363                                       "mm/zswap_pool:prepare",
1364                                       zswap_cpu_comp_prepare,
1365                                       zswap_cpu_comp_dead);
1366         if (ret)
1367                 goto hp_fail;
1368
1369         shrink_wq = create_workqueue("zswap-shrink");
1370         if (!shrink_wq)
1371                 goto hp_fail;
1372
1373         frontswap_register_ops(&zswap_frontswap_ops);
1374         if (zswap_debugfs_init())
1375                 pr_warn("debugfs initialization failed\n");
1376
1377         if (zswap_enabled)
1378                 zswap_try_pool_create();
1379
1380         return 0;
1381
1382 hp_fail:
1383         cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
1384 dstmem_fail:
1385         zswap_entry_cache_destroy();
1386 cache_fail:
1387         /* if built-in, we aren't unloaded on failure; don't allow use */
1388         zswap_init_failed = true;
1389         zswap_enabled = false;
1390         return -ENOMEM;
1391 }
1392 /* must be late so crypto has time to come up */
1393 late_initcall(init_zswap);
1394
1395 MODULE_LICENSE("GPL");
1396 MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>");
1397 MODULE_DESCRIPTION("Compressed cache for swap pages");