Merge tag 'kvm-s390-master-6.6-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[platform/kernel/linux-rpi.git] / drivers / mtd / mtdswap.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Swap block device support for MTDs
4  * Turns an MTD device into a swap device with block wear leveling
5  *
6  * Copyright © 2007,2011 Nokia Corporation. All rights reserved.
7  *
8  * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
9  *
10  * Based on Richard Purdie's earlier implementation in 2007. Background
11  * support and lock-less operation written by Adrian Hunter.
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/mtd/blktrans.h>
18 #include <linux/rbtree.h>
19 #include <linux/sched.h>
20 #include <linux/slab.h>
21 #include <linux/vmalloc.h>
22 #include <linux/blkdev.h>
23 #include <linux/swap.h>
24 #include <linux/debugfs.h>
25 #include <linux/seq_file.h>
26 #include <linux/device.h>
27 #include <linux/math64.h>
28
29 #define MTDSWAP_PREFIX "mtdswap"
30
31 /*
32  * The number of free eraseblocks when GC should stop
33  */
34 #define CLEAN_BLOCK_THRESHOLD   20
35
36 /*
37  * Number of free eraseblocks below which GC can also collect low frag
38  * blocks.
39  */
40 #define LOW_FRAG_GC_THRESHOLD   5
41
42 /*
43  * Wear level cost amortization. We want to do wear leveling on the background
44  * without disturbing gc too much. This is made by defining max GC frequency.
45  * Frequency value 6 means 1/6 of the GC passes will pick an erase block based
46  * on the biggest wear difference rather than the biggest dirtiness.
47  *
48  * The lower freq2 should be chosen so that it makes sure the maximum erase
49  * difference will decrease even if a malicious application is deliberately
50  * trying to make erase differences large.
51  */
52 #define MAX_ERASE_DIFF          4000
53 #define COLLECT_NONDIRTY_BASE   MAX_ERASE_DIFF
54 #define COLLECT_NONDIRTY_FREQ1  6
55 #define COLLECT_NONDIRTY_FREQ2  4
56
57 #define PAGE_UNDEF              UINT_MAX
58 #define BLOCK_UNDEF             UINT_MAX
59 #define BLOCK_ERROR             (UINT_MAX - 1)
60 #define BLOCK_MAX               (UINT_MAX - 2)
61
62 #define EBLOCK_BAD              (1 << 0)
63 #define EBLOCK_NOMAGIC          (1 << 1)
64 #define EBLOCK_BITFLIP          (1 << 2)
65 #define EBLOCK_FAILED           (1 << 3)
66 #define EBLOCK_READERR          (1 << 4)
67 #define EBLOCK_IDX_SHIFT        5
68
69 struct swap_eb {
70         struct rb_node rb;
71         struct rb_root *root;
72
73         unsigned int flags;
74         unsigned int active_count;
75         unsigned int erase_count;
76         unsigned int pad;               /* speeds up pointer decrement */
77 };
78
79 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
80                                 rb)->erase_count)
81 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
82                                 rb)->erase_count)
83
84 struct mtdswap_tree {
85         struct rb_root root;
86         unsigned int count;
87 };
88
89 enum {
90         MTDSWAP_CLEAN,
91         MTDSWAP_USED,
92         MTDSWAP_LOWFRAG,
93         MTDSWAP_HIFRAG,
94         MTDSWAP_DIRTY,
95         MTDSWAP_BITFLIP,
96         MTDSWAP_FAILING,
97         MTDSWAP_TREE_CNT,
98 };
99
100 struct mtdswap_dev {
101         struct mtd_blktrans_dev *mbd_dev;
102         struct mtd_info *mtd;
103         struct device *dev;
104
105         unsigned int *page_data;
106         unsigned int *revmap;
107
108         unsigned int eblks;
109         unsigned int spare_eblks;
110         unsigned int pages_per_eblk;
111         unsigned int max_erase_count;
112         struct swap_eb *eb_data;
113
114         struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
115
116         unsigned long long sect_read_count;
117         unsigned long long sect_write_count;
118         unsigned long long mtd_write_count;
119         unsigned long long mtd_read_count;
120         unsigned long long discard_count;
121         unsigned long long discard_page_count;
122
123         unsigned int curr_write_pos;
124         struct swap_eb *curr_write;
125
126         char *page_buf;
127         char *oob_buf;
128 };
129
130 struct mtdswap_oobdata {
131         __le16 magic;
132         __le32 count;
133 } __packed;
134
135 #define MTDSWAP_MAGIC_CLEAN     0x2095
136 #define MTDSWAP_MAGIC_DIRTY     (MTDSWAP_MAGIC_CLEAN + 1)
137 #define MTDSWAP_TYPE_CLEAN      0
138 #define MTDSWAP_TYPE_DIRTY      1
139 #define MTDSWAP_OOBSIZE         sizeof(struct mtdswap_oobdata)
140
141 #define MTDSWAP_ERASE_RETRIES   3 /* Before marking erase block bad */
142 #define MTDSWAP_IO_RETRIES      3
143
144 enum {
145         MTDSWAP_SCANNED_CLEAN,
146         MTDSWAP_SCANNED_DIRTY,
147         MTDSWAP_SCANNED_BITFLIP,
148         MTDSWAP_SCANNED_BAD,
149 };
150
151 /*
152  * In the worst case mtdswap_writesect() has allocated the last clean
153  * page from the current block and is then pre-empted by the GC
154  * thread. The thread can consume a full erase block when moving a
155  * block.
156  */
157 #define MIN_SPARE_EBLOCKS       2
158 #define MIN_ERASE_BLOCKS        (MIN_SPARE_EBLOCKS + 1)
159
160 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
161 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
162 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
163 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
164
165 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
166
167 static char partitions[128] = "";
168 module_param_string(partitions, partitions, sizeof(partitions), 0444);
169 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
170                 "partitions=\"1,3,5\"");
171
172 static unsigned int spare_eblocks = 10;
173 module_param(spare_eblocks, uint, 0444);
174 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
175                 "garbage collection (default 10%)");
176
177 static bool header; /* false */
178 module_param(header, bool, 0444);
179 MODULE_PARM_DESC(header,
180                 "Include builtin swap header (default 0, without header)");
181
182 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background);
183
184 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb)
185 {
186         return (loff_t)(eb - d->eb_data) * d->mtd->erasesize;
187 }
188
189 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb)
190 {
191         unsigned int oldidx;
192         struct mtdswap_tree *tp;
193
194         if (eb->root) {
195                 tp = container_of(eb->root, struct mtdswap_tree, root);
196                 oldidx = tp - &d->trees[0];
197
198                 d->trees[oldidx].count--;
199                 rb_erase(&eb->rb, eb->root);
200         }
201 }
202
203 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb)
204 {
205         struct rb_node **p, *parent = NULL;
206         struct swap_eb *cur;
207
208         p = &root->rb_node;
209         while (*p) {
210                 parent = *p;
211                 cur = rb_entry(parent, struct swap_eb, rb);
212                 if (eb->erase_count > cur->erase_count)
213                         p = &(*p)->rb_right;
214                 else
215                         p = &(*p)->rb_left;
216         }
217
218         rb_link_node(&eb->rb, parent, p);
219         rb_insert_color(&eb->rb, root);
220 }
221
222 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx)
223 {
224         struct rb_root *root;
225
226         if (eb->root == &d->trees[idx].root)
227                 return;
228
229         mtdswap_eb_detach(d, eb);
230         root = &d->trees[idx].root;
231         __mtdswap_rb_add(root, eb);
232         eb->root = root;
233         d->trees[idx].count++;
234 }
235
236 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx)
237 {
238         struct rb_node *p;
239         unsigned int i;
240
241         p = rb_first(root);
242         i = 0;
243         while (i < idx && p) {
244                 p = rb_next(p);
245                 i++;
246         }
247
248         return p;
249 }
250
251 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
252 {
253         int ret;
254         loff_t offset;
255
256         d->spare_eblks--;
257         eb->flags |= EBLOCK_BAD;
258         mtdswap_eb_detach(d, eb);
259         eb->root = NULL;
260
261         /* badblocks not supported */
262         if (!mtd_can_have_bb(d->mtd))
263                 return 1;
264
265         offset = mtdswap_eb_offset(d, eb);
266         dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
267         ret = mtd_block_markbad(d->mtd, offset);
268
269         if (ret) {
270                 dev_warn(d->dev, "Mark block bad failed for block at %08llx "
271                         "error %d\n", offset, ret);
272                 return ret;
273         }
274
275         return 1;
276
277 }
278
279 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
280 {
281         unsigned int marked = eb->flags & EBLOCK_FAILED;
282         struct swap_eb *curr_write = d->curr_write;
283
284         eb->flags |= EBLOCK_FAILED;
285         if (curr_write == eb) {
286                 d->curr_write = NULL;
287
288                 if (!marked && d->curr_write_pos != 0) {
289                         mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
290                         return 0;
291                 }
292         }
293
294         return mtdswap_handle_badblock(d, eb);
295 }
296
297 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
298                         struct mtd_oob_ops *ops)
299 {
300         int ret = mtd_read_oob(d->mtd, from, ops);
301
302         if (mtd_is_bitflip(ret))
303                 return ret;
304
305         if (ret) {
306                 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n",
307                         ret, from);
308                 return ret;
309         }
310
311         if (ops->oobretlen < ops->ooblen) {
312                 dev_warn(d->dev, "Read OOB return short read (%zd bytes not "
313                         "%zd) for block at %08llx\n",
314                         ops->oobretlen, ops->ooblen, from);
315                 return -EIO;
316         }
317
318         return 0;
319 }
320
321 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
322 {
323         struct mtdswap_oobdata *data, *data2;
324         int ret;
325         loff_t offset;
326         struct mtd_oob_ops ops = { };
327
328         offset = mtdswap_eb_offset(d, eb);
329
330         /* Check first if the block is bad. */
331         if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
332                 return MTDSWAP_SCANNED_BAD;
333
334         ops.ooblen = 2 * d->mtd->oobavail;
335         ops.oobbuf = d->oob_buf;
336         ops.ooboffs = 0;
337         ops.datbuf = NULL;
338         ops.mode = MTD_OPS_AUTO_OOB;
339
340         ret = mtdswap_read_oob(d, offset, &ops);
341
342         if (ret && !mtd_is_bitflip(ret))
343                 return ret;
344
345         data = (struct mtdswap_oobdata *)d->oob_buf;
346         data2 = (struct mtdswap_oobdata *)
347                 (d->oob_buf + d->mtd->oobavail);
348
349         if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
350                 eb->erase_count = le32_to_cpu(data->count);
351                 if (mtd_is_bitflip(ret))
352                         ret = MTDSWAP_SCANNED_BITFLIP;
353                 else {
354                         if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY)
355                                 ret = MTDSWAP_SCANNED_DIRTY;
356                         else
357                                 ret = MTDSWAP_SCANNED_CLEAN;
358                 }
359         } else {
360                 eb->flags |= EBLOCK_NOMAGIC;
361                 ret = MTDSWAP_SCANNED_DIRTY;
362         }
363
364         return ret;
365 }
366
367 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
368                                 u16 marker)
369 {
370         struct mtdswap_oobdata n;
371         int ret;
372         loff_t offset;
373         struct mtd_oob_ops ops = { };
374
375         ops.ooboffs = 0;
376         ops.oobbuf = (uint8_t *)&n;
377         ops.mode = MTD_OPS_AUTO_OOB;
378         ops.datbuf = NULL;
379
380         if (marker == MTDSWAP_TYPE_CLEAN) {
381                 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN);
382                 n.count = cpu_to_le32(eb->erase_count);
383                 ops.ooblen = MTDSWAP_OOBSIZE;
384                 offset = mtdswap_eb_offset(d, eb);
385         } else {
386                 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY);
387                 ops.ooblen = sizeof(n.magic);
388                 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
389         }
390
391         ret = mtd_write_oob(d->mtd, offset, &ops);
392
393         if (ret) {
394                 dev_warn(d->dev, "Write OOB failed for block at %08llx "
395                         "error %d\n", offset, ret);
396                 if (ret == -EIO || mtd_is_eccerr(ret))
397                         mtdswap_handle_write_error(d, eb);
398                 return ret;
399         }
400
401         if (ops.oobretlen != ops.ooblen) {
402                 dev_warn(d->dev, "Short OOB write for block at %08llx: "
403                         "%zd not %zd\n",
404                         offset, ops.oobretlen, ops.ooblen);
405                 return ret;
406         }
407
408         return 0;
409 }
410
411 /*
412  * Are there any erase blocks without MAGIC_CLEAN header, presumably
413  * because power was cut off after erase but before header write? We
414  * need to guestimate the erase count.
415  */
416 static void mtdswap_check_counts(struct mtdswap_dev *d)
417 {
418         struct rb_root hist_root = RB_ROOT;
419         struct rb_node *medrb;
420         struct swap_eb *eb;
421         unsigned int i, cnt, median;
422
423         cnt = 0;
424         for (i = 0; i < d->eblks; i++) {
425                 eb = d->eb_data + i;
426
427                 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
428                         continue;
429
430                 __mtdswap_rb_add(&hist_root, eb);
431                 cnt++;
432         }
433
434         if (cnt == 0)
435                 return;
436
437         medrb = mtdswap_rb_index(&hist_root, cnt / 2);
438         median = rb_entry(medrb, struct swap_eb, rb)->erase_count;
439
440         d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root);
441
442         for (i = 0; i < d->eblks; i++) {
443                 eb = d->eb_data + i;
444
445                 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR))
446                         eb->erase_count = median;
447
448                 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
449                         continue;
450
451                 rb_erase(&eb->rb, &hist_root);
452         }
453 }
454
455 static void mtdswap_scan_eblks(struct mtdswap_dev *d)
456 {
457         int status;
458         unsigned int i, idx;
459         struct swap_eb *eb;
460
461         for (i = 0; i < d->eblks; i++) {
462                 eb = d->eb_data + i;
463
464                 status = mtdswap_read_markers(d, eb);
465                 if (status < 0)
466                         eb->flags |= EBLOCK_READERR;
467                 else if (status == MTDSWAP_SCANNED_BAD) {
468                         eb->flags |= EBLOCK_BAD;
469                         continue;
470                 }
471
472                 switch (status) {
473                 case MTDSWAP_SCANNED_CLEAN:
474                         idx = MTDSWAP_CLEAN;
475                         break;
476                 case MTDSWAP_SCANNED_DIRTY:
477                 case MTDSWAP_SCANNED_BITFLIP:
478                         idx = MTDSWAP_DIRTY;
479                         break;
480                 default:
481                         idx = MTDSWAP_FAILING;
482                 }
483
484                 eb->flags |= (idx << EBLOCK_IDX_SHIFT);
485         }
486
487         mtdswap_check_counts(d);
488
489         for (i = 0; i < d->eblks; i++) {
490                 eb = d->eb_data + i;
491
492                 if (eb->flags & EBLOCK_BAD)
493                         continue;
494
495                 idx = eb->flags >> EBLOCK_IDX_SHIFT;
496                 mtdswap_rb_add(d, eb, idx);
497         }
498 }
499
500 /*
501  * Place eblk into a tree corresponding to its number of active blocks
502  * it contains.
503  */
504 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb)
505 {
506         unsigned int weight = eb->active_count;
507         unsigned int maxweight = d->pages_per_eblk;
508
509         if (eb == d->curr_write)
510                 return;
511
512         if (eb->flags & EBLOCK_BITFLIP)
513                 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
514         else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED))
515                 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
516         if (weight == maxweight)
517                 mtdswap_rb_add(d, eb, MTDSWAP_USED);
518         else if (weight == 0)
519                 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
520         else if (weight > (maxweight/2))
521                 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG);
522         else
523                 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG);
524 }
525
526 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb)
527 {
528         struct mtd_info *mtd = d->mtd;
529         struct erase_info erase;
530         unsigned int retries = 0;
531         int ret;
532
533         eb->erase_count++;
534         if (eb->erase_count > d->max_erase_count)
535                 d->max_erase_count = eb->erase_count;
536
537 retry:
538         memset(&erase, 0, sizeof(struct erase_info));
539         erase.addr      = mtdswap_eb_offset(d, eb);
540         erase.len       = mtd->erasesize;
541
542         ret = mtd_erase(mtd, &erase);
543         if (ret) {
544                 if (retries++ < MTDSWAP_ERASE_RETRIES) {
545                         dev_warn(d->dev,
546                                 "erase of erase block %#llx on %s failed",
547                                 erase.addr, mtd->name);
548                         yield();
549                         goto retry;
550                 }
551
552                 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n",
553                         erase.addr, mtd->name);
554
555                 mtdswap_handle_badblock(d, eb);
556                 return -EIO;
557         }
558
559         return 0;
560 }
561
562 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page,
563                                 unsigned int *block)
564 {
565         int ret;
566         struct swap_eb *old_eb = d->curr_write;
567         struct rb_root *clean_root;
568         struct swap_eb *eb;
569
570         if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) {
571                 do {
572                         if (TREE_EMPTY(d, CLEAN))
573                                 return -ENOSPC;
574
575                         clean_root = TREE_ROOT(d, CLEAN);
576                         eb = rb_entry(rb_first(clean_root), struct swap_eb, rb);
577                         rb_erase(&eb->rb, clean_root);
578                         eb->root = NULL;
579                         TREE_COUNT(d, CLEAN)--;
580
581                         ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY);
582                 } while (ret == -EIO || mtd_is_eccerr(ret));
583
584                 if (ret)
585                         return ret;
586
587                 d->curr_write_pos = 0;
588                 d->curr_write = eb;
589                 if (old_eb)
590                         mtdswap_store_eb(d, old_eb);
591         }
592
593         *block = (d->curr_write - d->eb_data) * d->pages_per_eblk +
594                 d->curr_write_pos;
595
596         d->curr_write->active_count++;
597         d->revmap[*block] = page;
598         d->curr_write_pos++;
599
600         return 0;
601 }
602
603 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d)
604 {
605         return TREE_COUNT(d, CLEAN) * d->pages_per_eblk +
606                 d->pages_per_eblk - d->curr_write_pos;
607 }
608
609 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d)
610 {
611         return mtdswap_free_page_cnt(d) > d->pages_per_eblk;
612 }
613
614 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf,
615                         unsigned int page, unsigned int *bp, int gc_context)
616 {
617         struct mtd_info *mtd = d->mtd;
618         struct swap_eb *eb;
619         size_t retlen;
620         loff_t writepos;
621         int ret;
622
623 retry:
624         if (!gc_context)
625                 while (!mtdswap_enough_free_pages(d))
626                         if (mtdswap_gc(d, 0) > 0)
627                                 return -ENOSPC;
628
629         ret = mtdswap_map_free_block(d, page, bp);
630         eb = d->eb_data + (*bp / d->pages_per_eblk);
631
632         if (ret == -EIO || mtd_is_eccerr(ret)) {
633                 d->curr_write = NULL;
634                 eb->active_count--;
635                 d->revmap[*bp] = PAGE_UNDEF;
636                 goto retry;
637         }
638
639         if (ret < 0)
640                 return ret;
641
642         writepos = (loff_t)*bp << PAGE_SHIFT;
643         ret =  mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf);
644         if (ret == -EIO || mtd_is_eccerr(ret)) {
645                 d->curr_write_pos--;
646                 eb->active_count--;
647                 d->revmap[*bp] = PAGE_UNDEF;
648                 mtdswap_handle_write_error(d, eb);
649                 goto retry;
650         }
651
652         if (ret < 0) {
653                 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)",
654                         ret, retlen);
655                 goto err;
656         }
657
658         if (retlen != PAGE_SIZE) {
659                 dev_err(d->dev, "Short write to MTD device: %zd written",
660                         retlen);
661                 ret = -EIO;
662                 goto err;
663         }
664
665         return ret;
666
667 err:
668         d->curr_write_pos--;
669         eb->active_count--;
670         d->revmap[*bp] = PAGE_UNDEF;
671
672         return ret;
673 }
674
675 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
676                 unsigned int *newblock)
677 {
678         struct mtd_info *mtd = d->mtd;
679         struct swap_eb *eb, *oldeb;
680         int ret;
681         size_t retlen;
682         unsigned int page, retries;
683         loff_t readpos;
684
685         page = d->revmap[oldblock];
686         readpos = (loff_t) oldblock << PAGE_SHIFT;
687         retries = 0;
688
689 retry:
690         ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
691
692         if (ret < 0 && !mtd_is_bitflip(ret)) {
693                 oldeb = d->eb_data + oldblock / d->pages_per_eblk;
694                 oldeb->flags |= EBLOCK_READERR;
695
696                 dev_err(d->dev, "Read Error: %d (block %u)\n", ret,
697                         oldblock);
698                 retries++;
699                 if (retries < MTDSWAP_IO_RETRIES)
700                         goto retry;
701
702                 goto read_error;
703         }
704
705         if (retlen != PAGE_SIZE) {
706                 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen,
707                        oldblock);
708                 ret = -EIO;
709                 goto read_error;
710         }
711
712         ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1);
713         if (ret < 0) {
714                 d->page_data[page] = BLOCK_ERROR;
715                 dev_err(d->dev, "Write error: %d\n", ret);
716                 return ret;
717         }
718
719         d->page_data[page] = *newblock;
720         d->revmap[oldblock] = PAGE_UNDEF;
721         eb = d->eb_data + oldblock / d->pages_per_eblk;
722         eb->active_count--;
723
724         return 0;
725
726 read_error:
727         d->page_data[page] = BLOCK_ERROR;
728         d->revmap[oldblock] = PAGE_UNDEF;
729         return ret;
730 }
731
732 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb)
733 {
734         unsigned int i, block, eblk_base, newblock;
735         int ret, errcode;
736
737         errcode = 0;
738         eblk_base = (eb - d->eb_data) * d->pages_per_eblk;
739
740         for (i = 0; i < d->pages_per_eblk; i++) {
741                 if (d->spare_eblks < MIN_SPARE_EBLOCKS)
742                         return -ENOSPC;
743
744                 block = eblk_base + i;
745                 if (d->revmap[block] == PAGE_UNDEF)
746                         continue;
747
748                 ret = mtdswap_move_block(d, block, &newblock);
749                 if (ret < 0 && !errcode)
750                         errcode = ret;
751         }
752
753         return errcode;
754 }
755
756 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d)
757 {
758         int idx, stopat;
759
760         if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD)
761                 stopat = MTDSWAP_LOWFRAG;
762         else
763                 stopat = MTDSWAP_HIFRAG;
764
765         for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--)
766                 if (d->trees[idx].root.rb_node != NULL)
767                         return idx;
768
769         return -1;
770 }
771
772 static int mtdswap_wlfreq(unsigned int maxdiff)
773 {
774         unsigned int h, x, y, dist, base;
775
776         /*
777          * Calculate linear ramp down from f1 to f2 when maxdiff goes from
778          * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE.  Similar
779          * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE.
780          */
781
782         dist = maxdiff - MAX_ERASE_DIFF;
783         if (dist > COLLECT_NONDIRTY_BASE)
784                 dist = COLLECT_NONDIRTY_BASE;
785
786         /*
787          * Modelling the slop as right angular triangle with base
788          * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is
789          * equal to the ratio h/base.
790          */
791         h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2;
792         base = COLLECT_NONDIRTY_BASE;
793
794         x = dist - base;
795         y = (x * h + base / 2) / base;
796
797         return COLLECT_NONDIRTY_FREQ2 + y;
798 }
799
800 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d)
801 {
802         static unsigned int pick_cnt;
803         unsigned int i, idx = -1, wear, max;
804         struct rb_root *root;
805
806         max = 0;
807         for (i = 0; i <= MTDSWAP_DIRTY; i++) {
808                 root = &d->trees[i].root;
809                 if (root->rb_node == NULL)
810                         continue;
811
812                 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root);
813                 if (wear > max) {
814                         max = wear;
815                         idx = i;
816                 }
817         }
818
819         if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
820                 pick_cnt = 0;
821                 return idx;
822         }
823
824         pick_cnt++;
825         return -1;
826 }
827
828 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d,
829                                 unsigned int background)
830 {
831         int idx;
832
833         if (TREE_NONEMPTY(d, FAILING) &&
834                 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY))))
835                 return MTDSWAP_FAILING;
836
837         idx = mtdswap_choose_wl_tree(d);
838         if (idx >= MTDSWAP_CLEAN)
839                 return idx;
840
841         return __mtdswap_choose_gc_tree(d);
842 }
843
844 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d,
845                                         unsigned int background)
846 {
847         struct rb_root *rp = NULL;
848         struct swap_eb *eb = NULL;
849         int idx;
850
851         if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD &&
852                 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING))
853                 return NULL;
854
855         idx = mtdswap_choose_gc_tree(d, background);
856         if (idx < 0)
857                 return NULL;
858
859         rp = &d->trees[idx].root;
860         eb = rb_entry(rb_first(rp), struct swap_eb, rb);
861
862         rb_erase(&eb->rb, rp);
863         eb->root = NULL;
864         d->trees[idx].count--;
865         return eb;
866 }
867
868 static unsigned int mtdswap_test_patt(unsigned int i)
869 {
870         return i % 2 ? 0x55555555 : 0xAAAAAAAA;
871 }
872
873 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
874                                         struct swap_eb *eb)
875 {
876         struct mtd_info *mtd = d->mtd;
877         unsigned int test, i, j, patt, mtd_pages;
878         loff_t base, pos;
879         unsigned int *p1 = (unsigned int *)d->page_buf;
880         unsigned char *p2 = (unsigned char *)d->oob_buf;
881         struct mtd_oob_ops ops = { };
882         int ret;
883
884         ops.mode = MTD_OPS_AUTO_OOB;
885         ops.len = mtd->writesize;
886         ops.ooblen = mtd->oobavail;
887         ops.ooboffs = 0;
888         ops.datbuf = d->page_buf;
889         ops.oobbuf = d->oob_buf;
890         base = mtdswap_eb_offset(d, eb);
891         mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize;
892
893         for (test = 0; test < 2; test++) {
894                 pos = base;
895                 for (i = 0; i < mtd_pages; i++) {
896                         patt = mtdswap_test_patt(test + i);
897                         memset(d->page_buf, patt, mtd->writesize);
898                         memset(d->oob_buf, patt, mtd->oobavail);
899                         ret = mtd_write_oob(mtd, pos, &ops);
900                         if (ret)
901                                 goto error;
902
903                         pos += mtd->writesize;
904                 }
905
906                 pos = base;
907                 for (i = 0; i < mtd_pages; i++) {
908                         ret = mtd_read_oob(mtd, pos, &ops);
909                         if (ret)
910                                 goto error;
911
912                         patt = mtdswap_test_patt(test + i);
913                         for (j = 0; j < mtd->writesize/sizeof(int); j++)
914                                 if (p1[j] != patt)
915                                         goto error;
916
917                         for (j = 0; j < mtd->oobavail; j++)
918                                 if (p2[j] != (unsigned char)patt)
919                                         goto error;
920
921                         pos += mtd->writesize;
922                 }
923
924                 ret = mtdswap_erase_block(d, eb);
925                 if (ret)
926                         goto error;
927         }
928
929         eb->flags &= ~EBLOCK_READERR;
930         return 1;
931
932 error:
933         mtdswap_handle_badblock(d, eb);
934         return 0;
935 }
936
937 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background)
938 {
939         struct swap_eb *eb;
940         int ret;
941
942         if (d->spare_eblks < MIN_SPARE_EBLOCKS)
943                 return 1;
944
945         eb = mtdswap_pick_gc_eblk(d, background);
946         if (!eb)
947                 return 1;
948
949         ret = mtdswap_gc_eblock(d, eb);
950         if (ret == -ENOSPC)
951                 return 1;
952
953         if (eb->flags & EBLOCK_FAILED) {
954                 mtdswap_handle_badblock(d, eb);
955                 return 0;
956         }
957
958         eb->flags &= ~EBLOCK_BITFLIP;
959         ret = mtdswap_erase_block(d, eb);
960         if ((eb->flags & EBLOCK_READERR) &&
961                 (ret || !mtdswap_eblk_passes(d, eb)))
962                 return 0;
963
964         if (ret == 0)
965                 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN);
966
967         if (ret == 0)
968                 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN);
969         else if (ret != -EIO && !mtd_is_eccerr(ret))
970                 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
971
972         return 0;
973 }
974
975 static void mtdswap_background(struct mtd_blktrans_dev *dev)
976 {
977         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
978         int ret;
979
980         while (1) {
981                 ret = mtdswap_gc(d, 1);
982                 if (ret || mtd_blktrans_cease_background(dev))
983                         return;
984         }
985 }
986
987 static void mtdswap_cleanup(struct mtdswap_dev *d)
988 {
989         vfree(d->eb_data);
990         vfree(d->revmap);
991         vfree(d->page_data);
992         kfree(d->oob_buf);
993         kfree(d->page_buf);
994 }
995
996 static int mtdswap_flush(struct mtd_blktrans_dev *dev)
997 {
998         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
999
1000         mtd_sync(d->mtd);
1001         return 0;
1002 }
1003
1004 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
1005 {
1006         loff_t offset;
1007         unsigned int badcnt;
1008
1009         badcnt = 0;
1010
1011         if (mtd_can_have_bb(mtd))
1012                 for (offset = 0; offset < size; offset += mtd->erasesize)
1013                         if (mtd_block_isbad(mtd, offset))
1014                                 badcnt++;
1015
1016         return badcnt;
1017 }
1018
1019 static int mtdswap_writesect(struct mtd_blktrans_dev *dev,
1020                         unsigned long page, char *buf)
1021 {
1022         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1023         unsigned int newblock, mapped;
1024         struct swap_eb *eb;
1025         int ret;
1026
1027         d->sect_write_count++;
1028
1029         if (d->spare_eblks < MIN_SPARE_EBLOCKS)
1030                 return -ENOSPC;
1031
1032         if (header) {
1033                 /* Ignore writes to the header page */
1034                 if (unlikely(page == 0))
1035                         return 0;
1036
1037                 page--;
1038         }
1039
1040         mapped = d->page_data[page];
1041         if (mapped <= BLOCK_MAX) {
1042                 eb = d->eb_data + (mapped / d->pages_per_eblk);
1043                 eb->active_count--;
1044                 mtdswap_store_eb(d, eb);
1045                 d->page_data[page] = BLOCK_UNDEF;
1046                 d->revmap[mapped] = PAGE_UNDEF;
1047         }
1048
1049         ret = mtdswap_write_block(d, buf, page, &newblock, 0);
1050         d->mtd_write_count++;
1051
1052         if (ret < 0)
1053                 return ret;
1054
1055         d->page_data[page] = newblock;
1056
1057         return 0;
1058 }
1059
1060 /* Provide a dummy swap header for the kernel */
1061 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
1062 {
1063         union swap_header *hd = (union swap_header *)(buf);
1064
1065         memset(buf, 0, PAGE_SIZE - 10);
1066
1067         hd->info.version = 1;
1068         hd->info.last_page = d->mbd_dev->size - 1;
1069         hd->info.nr_badpages = 0;
1070
1071         memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
1072
1073         return 0;
1074 }
1075
1076 static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
1077                         unsigned long page, char *buf)
1078 {
1079         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1080         struct mtd_info *mtd = d->mtd;
1081         unsigned int realblock, retries;
1082         loff_t readpos;
1083         struct swap_eb *eb;
1084         size_t retlen;
1085         int ret;
1086
1087         d->sect_read_count++;
1088
1089         if (header) {
1090                 if (unlikely(page == 0))
1091                         return mtdswap_auto_header(d, buf);
1092
1093                 page--;
1094         }
1095
1096         realblock = d->page_data[page];
1097         if (realblock > BLOCK_MAX) {
1098                 memset(buf, 0x0, PAGE_SIZE);
1099                 if (realblock == BLOCK_UNDEF)
1100                         return 0;
1101                 else
1102                         return -EIO;
1103         }
1104
1105         eb = d->eb_data + (realblock / d->pages_per_eblk);
1106         BUG_ON(d->revmap[realblock] == PAGE_UNDEF);
1107
1108         readpos = (loff_t)realblock << PAGE_SHIFT;
1109         retries = 0;
1110
1111 retry:
1112         ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf);
1113
1114         d->mtd_read_count++;
1115         if (mtd_is_bitflip(ret)) {
1116                 eb->flags |= EBLOCK_BITFLIP;
1117                 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
1118                 ret = 0;
1119         }
1120
1121         if (ret < 0) {
1122                 dev_err(d->dev, "Read error %d\n", ret);
1123                 eb->flags |= EBLOCK_READERR;
1124                 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
1125                 retries++;
1126                 if (retries < MTDSWAP_IO_RETRIES)
1127                         goto retry;
1128
1129                 return ret;
1130         }
1131
1132         if (retlen != PAGE_SIZE) {
1133                 dev_err(d->dev, "Short read %zd\n", retlen);
1134                 return -EIO;
1135         }
1136
1137         return 0;
1138 }
1139
1140 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first,
1141                         unsigned nr_pages)
1142 {
1143         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1144         unsigned long page;
1145         struct swap_eb *eb;
1146         unsigned int mapped;
1147
1148         d->discard_count++;
1149
1150         for (page = first; page < first + nr_pages; page++) {
1151                 mapped = d->page_data[page];
1152                 if (mapped <= BLOCK_MAX) {
1153                         eb = d->eb_data + (mapped / d->pages_per_eblk);
1154                         eb->active_count--;
1155                         mtdswap_store_eb(d, eb);
1156                         d->page_data[page] = BLOCK_UNDEF;
1157                         d->revmap[mapped] = PAGE_UNDEF;
1158                         d->discard_page_count++;
1159                 } else if (mapped == BLOCK_ERROR) {
1160                         d->page_data[page] = BLOCK_UNDEF;
1161                         d->discard_page_count++;
1162                 }
1163         }
1164
1165         return 0;
1166 }
1167
1168 static int mtdswap_show(struct seq_file *s, void *data)
1169 {
1170         struct mtdswap_dev *d = (struct mtdswap_dev *) s->private;
1171         unsigned long sum;
1172         unsigned int count[MTDSWAP_TREE_CNT];
1173         unsigned int min[MTDSWAP_TREE_CNT];
1174         unsigned int max[MTDSWAP_TREE_CNT];
1175         unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages;
1176         uint64_t use_size;
1177         static const char * const name[] = {
1178                 "clean", "used", "low", "high", "dirty", "bitflip", "failing"
1179         };
1180
1181         mutex_lock(&d->mbd_dev->lock);
1182
1183         for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1184                 struct rb_root *root = &d->trees[i].root;
1185
1186                 if (root->rb_node) {
1187                         count[i] = d->trees[i].count;
1188                         min[i] = MTDSWAP_ECNT_MIN(root);
1189                         max[i] = MTDSWAP_ECNT_MAX(root);
1190                 } else
1191                         count[i] = 0;
1192         }
1193
1194         if (d->curr_write) {
1195                 cw = 1;
1196                 cwp = d->curr_write_pos;
1197                 cwecount = d->curr_write->erase_count;
1198         }
1199
1200         sum = 0;
1201         for (i = 0; i < d->eblks; i++)
1202                 sum += d->eb_data[i].erase_count;
1203
1204         use_size = (uint64_t)d->eblks * d->mtd->erasesize;
1205         bb_cnt = mtdswap_badblocks(d->mtd, use_size);
1206
1207         mapped = 0;
1208         pages = d->mbd_dev->size;
1209         for (i = 0; i < pages; i++)
1210                 if (d->page_data[i] != BLOCK_UNDEF)
1211                         mapped++;
1212
1213         mutex_unlock(&d->mbd_dev->lock);
1214
1215         for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1216                 if (!count[i])
1217                         continue;
1218
1219                 if (min[i] != max[i])
1220                         seq_printf(s, "%s:\t%5d erase blocks, erased min %d, "
1221                                 "max %d times\n",
1222                                 name[i], count[i], min[i], max[i]);
1223                 else
1224                         seq_printf(s, "%s:\t%5d erase blocks, all erased %d "
1225                                 "times\n", name[i], count[i], min[i]);
1226         }
1227
1228         if (bb_cnt)
1229                 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt);
1230
1231         if (cw)
1232                 seq_printf(s, "current erase block: %u pages used, %u free, "
1233                         "erased %u times\n",
1234                         cwp, d->pages_per_eblk - cwp, cwecount);
1235
1236         seq_printf(s, "total erasures: %lu\n", sum);
1237
1238         seq_puts(s, "\n");
1239
1240         seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count);
1241         seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count);
1242         seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count);
1243         seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count);
1244         seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count);
1245         seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count);
1246
1247         seq_puts(s, "\n");
1248         seq_printf(s, "total pages: %u\n", pages);
1249         seq_printf(s, "pages mapped: %u\n", mapped);
1250
1251         return 0;
1252 }
1253 DEFINE_SHOW_ATTRIBUTE(mtdswap);
1254
1255 static int mtdswap_add_debugfs(struct mtdswap_dev *d)
1256 {
1257         struct dentry *root = d->mtd->dbg.dfs_dir;
1258
1259         if (!IS_ENABLED(CONFIG_DEBUG_FS))
1260                 return 0;
1261
1262         if (IS_ERR_OR_NULL(root))
1263                 return -1;
1264
1265         debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, &mtdswap_fops);
1266
1267         return 0;
1268 }
1269
1270 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
1271                         unsigned int spare_cnt)
1272 {
1273         struct mtd_info *mtd = d->mbd_dev->mtd;
1274         unsigned int i, eblk_bytes, pages, blocks;
1275         int ret = -ENOMEM;
1276
1277         d->mtd = mtd;
1278         d->eblks = eblocks;
1279         d->spare_eblks = spare_cnt;
1280         d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT;
1281
1282         pages = d->mbd_dev->size;
1283         blocks = eblocks * d->pages_per_eblk;
1284
1285         for (i = 0; i < MTDSWAP_TREE_CNT; i++)
1286                 d->trees[i].root = RB_ROOT;
1287
1288         d->page_data = vmalloc(array_size(pages, sizeof(int)));
1289         if (!d->page_data)
1290                 goto page_data_fail;
1291
1292         d->revmap = vmalloc(array_size(blocks, sizeof(int)));
1293         if (!d->revmap)
1294                 goto revmap_fail;
1295
1296         eblk_bytes = sizeof(struct swap_eb)*d->eblks;
1297         d->eb_data = vzalloc(eblk_bytes);
1298         if (!d->eb_data)
1299                 goto eb_data_fail;
1300
1301         for (i = 0; i < pages; i++)
1302                 d->page_data[i] = BLOCK_UNDEF;
1303
1304         for (i = 0; i < blocks; i++)
1305                 d->revmap[i] = PAGE_UNDEF;
1306
1307         d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1308         if (!d->page_buf)
1309                 goto page_buf_fail;
1310
1311         d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL);
1312         if (!d->oob_buf)
1313                 goto oob_buf_fail;
1314
1315         mtdswap_scan_eblks(d);
1316
1317         return 0;
1318
1319 oob_buf_fail:
1320         kfree(d->page_buf);
1321 page_buf_fail:
1322         vfree(d->eb_data);
1323 eb_data_fail:
1324         vfree(d->revmap);
1325 revmap_fail:
1326         vfree(d->page_data);
1327 page_data_fail:
1328         printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret);
1329         return ret;
1330 }
1331
1332 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
1333 {
1334         struct mtdswap_dev *d;
1335         struct mtd_blktrans_dev *mbd_dev;
1336         char *parts;
1337         char *this_opt;
1338         unsigned long part;
1339         unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
1340         uint64_t swap_size, use_size, size_limit;
1341         int ret;
1342
1343         parts = &partitions[0];
1344         if (!*parts)
1345                 return;
1346
1347         while ((this_opt = strsep(&parts, ",")) != NULL) {
1348                 if (kstrtoul(this_opt, 0, &part) < 0)
1349                         return;
1350
1351                 if (mtd->index == part)
1352                         break;
1353         }
1354
1355         if (mtd->index != part)
1356                 return;
1357
1358         if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) {
1359                 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE "
1360                         "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE);
1361                 return;
1362         }
1363
1364         if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) {
1365                 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size"
1366                         " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize);
1367                 return;
1368         }
1369
1370         if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) {
1371                 printk(KERN_ERR "%s: Not enough free bytes in OOB, "
1372                         "%d available, %zu needed.\n",
1373                         MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE);
1374                 return;
1375         }
1376
1377         if (spare_eblocks > 100)
1378                 spare_eblocks = 100;
1379
1380         use_size = mtd->size;
1381         size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE;
1382
1383         if (mtd->size > size_limit) {
1384                 printk(KERN_WARNING "%s: Device too large. Limiting size to "
1385                         "%llu bytes\n", MTDSWAP_PREFIX, size_limit);
1386                 use_size = size_limit;
1387         }
1388
1389         eblocks = mtd_div_by_eb(use_size, mtd);
1390         use_size = (uint64_t)eblocks * mtd->erasesize;
1391         bad_blocks = mtdswap_badblocks(mtd, use_size);
1392         eavailable = eblocks - bad_blocks;
1393
1394         if (eavailable < MIN_ERASE_BLOCKS) {
1395                 printk(KERN_ERR "%s: Not enough erase blocks. %u available, "
1396                         "%d needed\n", MTDSWAP_PREFIX, eavailable,
1397                         MIN_ERASE_BLOCKS);
1398                 return;
1399         }
1400
1401         spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100);
1402
1403         if (spare_cnt < MIN_SPARE_EBLOCKS)
1404                 spare_cnt = MIN_SPARE_EBLOCKS;
1405
1406         if (spare_cnt > eavailable - 1)
1407                 spare_cnt = eavailable - 1;
1408
1409         swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize +
1410                 (header ? PAGE_SIZE : 0);
1411
1412         printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
1413                 "%u spare, %u bad blocks\n",
1414                 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks);
1415
1416         d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL);
1417         if (!d)
1418                 return;
1419
1420         mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL);
1421         if (!mbd_dev) {
1422                 kfree(d);
1423                 return;
1424         }
1425
1426         d->mbd_dev = mbd_dev;
1427         mbd_dev->priv = d;
1428
1429         mbd_dev->mtd = mtd;
1430         mbd_dev->devnum = mtd->index;
1431         mbd_dev->size = swap_size >> PAGE_SHIFT;
1432         mbd_dev->tr = tr;
1433
1434         if (!(mtd->flags & MTD_WRITEABLE))
1435                 mbd_dev->readonly = 1;
1436
1437         if (mtdswap_init(d, eblocks, spare_cnt) < 0)
1438                 goto init_failed;
1439
1440         if (add_mtd_blktrans_dev(mbd_dev) < 0)
1441                 goto cleanup;
1442
1443         d->dev = disk_to_dev(mbd_dev->disk);
1444
1445         ret = mtdswap_add_debugfs(d);
1446         if (ret < 0)
1447                 goto debugfs_failed;
1448
1449         return;
1450
1451 debugfs_failed:
1452         del_mtd_blktrans_dev(mbd_dev);
1453
1454 cleanup:
1455         mtdswap_cleanup(d);
1456
1457 init_failed:
1458         kfree(mbd_dev);
1459         kfree(d);
1460 }
1461
1462 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev)
1463 {
1464         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1465
1466         del_mtd_blktrans_dev(dev);
1467         mtdswap_cleanup(d);
1468         kfree(d);
1469 }
1470
1471 static struct mtd_blktrans_ops mtdswap_ops = {
1472         .name           = "mtdswap",
1473         .major          = 0,
1474         .part_bits      = 0,
1475         .blksize        = PAGE_SIZE,
1476         .flush          = mtdswap_flush,
1477         .readsect       = mtdswap_readsect,
1478         .writesect      = mtdswap_writesect,
1479         .discard        = mtdswap_discard,
1480         .background     = mtdswap_background,
1481         .add_mtd        = mtdswap_add_mtd,
1482         .remove_dev     = mtdswap_remove_dev,
1483         .owner          = THIS_MODULE,
1484 };
1485
1486 module_mtd_blktrans(mtdswap_ops);
1487
1488 MODULE_LICENSE("GPL");
1489 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
1490 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as "
1491                 "swap space");