Merge patch series "riscv: dma-mapping: unify support for cache flushes"
[platform/kernel/linux-starfive.git] / fs / f2fs / segment.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/segment.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/sched/mm.h>
13 #include <linux/prefetch.h>
14 #include <linux/kthread.h>
15 #include <linux/swap.h>
16 #include <linux/timer.h>
17 #include <linux/freezer.h>
18 #include <linux/sched/signal.h>
19 #include <linux/random.h>
20
21 #include "f2fs.h"
22 #include "segment.h"
23 #include "node.h"
24 #include "gc.h"
25 #include "iostat.h"
26 #include <trace/events/f2fs.h>
27
28 #define __reverse_ffz(x) __reverse_ffs(~(x))
29
30 static struct kmem_cache *discard_entry_slab;
31 static struct kmem_cache *discard_cmd_slab;
32 static struct kmem_cache *sit_entry_set_slab;
33 static struct kmem_cache *revoke_entry_slab;
34
35 static unsigned long __reverse_ulong(unsigned char *str)
36 {
37         unsigned long tmp = 0;
38         int shift = 24, idx = 0;
39
40 #if BITS_PER_LONG == 64
41         shift = 56;
42 #endif
43         while (shift >= 0) {
44                 tmp |= (unsigned long)str[idx++] << shift;
45                 shift -= BITS_PER_BYTE;
46         }
47         return tmp;
48 }
49
50 /*
51  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
52  * MSB and LSB are reversed in a byte by f2fs_set_bit.
53  */
54 static inline unsigned long __reverse_ffs(unsigned long word)
55 {
56         int num = 0;
57
58 #if BITS_PER_LONG == 64
59         if ((word & 0xffffffff00000000UL) == 0)
60                 num += 32;
61         else
62                 word >>= 32;
63 #endif
64         if ((word & 0xffff0000) == 0)
65                 num += 16;
66         else
67                 word >>= 16;
68
69         if ((word & 0xff00) == 0)
70                 num += 8;
71         else
72                 word >>= 8;
73
74         if ((word & 0xf0) == 0)
75                 num += 4;
76         else
77                 word >>= 4;
78
79         if ((word & 0xc) == 0)
80                 num += 2;
81         else
82                 word >>= 2;
83
84         if ((word & 0x2) == 0)
85                 num += 1;
86         return num;
87 }
88
89 /*
90  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
91  * f2fs_set_bit makes MSB and LSB reversed in a byte.
92  * @size must be integral times of unsigned long.
93  * Example:
94  *                             MSB <--> LSB
95  *   f2fs_set_bit(0, bitmap) => 1000 0000
96  *   f2fs_set_bit(7, bitmap) => 0000 0001
97  */
98 static unsigned long __find_rev_next_bit(const unsigned long *addr,
99                         unsigned long size, unsigned long offset)
100 {
101         const unsigned long *p = addr + BIT_WORD(offset);
102         unsigned long result = size;
103         unsigned long tmp;
104
105         if (offset >= size)
106                 return size;
107
108         size -= (offset & ~(BITS_PER_LONG - 1));
109         offset %= BITS_PER_LONG;
110
111         while (1) {
112                 if (*p == 0)
113                         goto pass;
114
115                 tmp = __reverse_ulong((unsigned char *)p);
116
117                 tmp &= ~0UL >> offset;
118                 if (size < BITS_PER_LONG)
119                         tmp &= (~0UL << (BITS_PER_LONG - size));
120                 if (tmp)
121                         goto found;
122 pass:
123                 if (size <= BITS_PER_LONG)
124                         break;
125                 size -= BITS_PER_LONG;
126                 offset = 0;
127                 p++;
128         }
129         return result;
130 found:
131         return result - size + __reverse_ffs(tmp);
132 }
133
134 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
135                         unsigned long size, unsigned long offset)
136 {
137         const unsigned long *p = addr + BIT_WORD(offset);
138         unsigned long result = size;
139         unsigned long tmp;
140
141         if (offset >= size)
142                 return size;
143
144         size -= (offset & ~(BITS_PER_LONG - 1));
145         offset %= BITS_PER_LONG;
146
147         while (1) {
148                 if (*p == ~0UL)
149                         goto pass;
150
151                 tmp = __reverse_ulong((unsigned char *)p);
152
153                 if (offset)
154                         tmp |= ~0UL << (BITS_PER_LONG - offset);
155                 if (size < BITS_PER_LONG)
156                         tmp |= ~0UL >> size;
157                 if (tmp != ~0UL)
158                         goto found;
159 pass:
160                 if (size <= BITS_PER_LONG)
161                         break;
162                 size -= BITS_PER_LONG;
163                 offset = 0;
164                 p++;
165         }
166         return result;
167 found:
168         return result - size + __reverse_ffz(tmp);
169 }
170
171 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
172 {
173         int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
174         int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
175         int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
176
177         if (f2fs_lfs_mode(sbi))
178                 return false;
179         if (sbi->gc_mode == GC_URGENT_HIGH)
180                 return true;
181         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
182                 return true;
183
184         return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
185                         SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
186 }
187
188 void f2fs_abort_atomic_write(struct inode *inode, bool clean)
189 {
190         struct f2fs_inode_info *fi = F2FS_I(inode);
191
192         if (!f2fs_is_atomic_file(inode))
193                 return;
194
195         release_atomic_write_cnt(inode);
196         clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
197         clear_inode_flag(inode, FI_ATOMIC_REPLACE);
198         clear_inode_flag(inode, FI_ATOMIC_FILE);
199         stat_dec_atomic_inode(inode);
200
201         F2FS_I(inode)->atomic_write_task = NULL;
202
203         if (clean) {
204                 truncate_inode_pages_final(inode->i_mapping);
205                 f2fs_i_size_write(inode, fi->original_i_size);
206                 fi->original_i_size = 0;
207         }
208 }
209
210 static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
211                         block_t new_addr, block_t *old_addr, bool recover)
212 {
213         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
214         struct dnode_of_data dn;
215         struct node_info ni;
216         int err;
217
218 retry:
219         set_new_dnode(&dn, inode, NULL, NULL, 0);
220         err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
221         if (err) {
222                 if (err == -ENOMEM) {
223                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
224                         goto retry;
225                 }
226                 return err;
227         }
228
229         err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
230         if (err) {
231                 f2fs_put_dnode(&dn);
232                 return err;
233         }
234
235         if (recover) {
236                 /* dn.data_blkaddr is always valid */
237                 if (!__is_valid_data_blkaddr(new_addr)) {
238                         if (new_addr == NULL_ADDR)
239                                 dec_valid_block_count(sbi, inode, 1);
240                         f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
241                         f2fs_update_data_blkaddr(&dn, new_addr);
242                 } else {
243                         f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
244                                 new_addr, ni.version, true, true);
245                 }
246         } else {
247                 blkcnt_t count = 1;
248
249                 err = inc_valid_block_count(sbi, inode, &count);
250                 if (err) {
251                         f2fs_put_dnode(&dn);
252                         return err;
253                 }
254
255                 *old_addr = dn.data_blkaddr;
256                 f2fs_truncate_data_blocks_range(&dn, 1);
257                 dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
258
259                 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
260                                         ni.version, true, false);
261         }
262
263         f2fs_put_dnode(&dn);
264
265         trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
266                         index, old_addr ? *old_addr : 0, new_addr, recover);
267         return 0;
268 }
269
270 static void __complete_revoke_list(struct inode *inode, struct list_head *head,
271                                         bool revoke)
272 {
273         struct revoke_entry *cur, *tmp;
274         pgoff_t start_index = 0;
275         bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
276
277         list_for_each_entry_safe(cur, tmp, head, list) {
278                 if (revoke) {
279                         __replace_atomic_write_block(inode, cur->index,
280                                                 cur->old_addr, NULL, true);
281                 } else if (truncate) {
282                         f2fs_truncate_hole(inode, start_index, cur->index);
283                         start_index = cur->index + 1;
284                 }
285
286                 list_del(&cur->list);
287                 kmem_cache_free(revoke_entry_slab, cur);
288         }
289
290         if (!revoke && truncate)
291                 f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
292 }
293
294 static int __f2fs_commit_atomic_write(struct inode *inode)
295 {
296         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
297         struct f2fs_inode_info *fi = F2FS_I(inode);
298         struct inode *cow_inode = fi->cow_inode;
299         struct revoke_entry *new;
300         struct list_head revoke_list;
301         block_t blkaddr;
302         struct dnode_of_data dn;
303         pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
304         pgoff_t off = 0, blen, index;
305         int ret = 0, i;
306
307         INIT_LIST_HEAD(&revoke_list);
308
309         while (len) {
310                 blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
311
312                 set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
313                 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
314                 if (ret && ret != -ENOENT) {
315                         goto out;
316                 } else if (ret == -ENOENT) {
317                         ret = 0;
318                         if (dn.max_level == 0)
319                                 goto out;
320                         goto next;
321                 }
322
323                 blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
324                                 len);
325                 index = off;
326                 for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
327                         blkaddr = f2fs_data_blkaddr(&dn);
328
329                         if (!__is_valid_data_blkaddr(blkaddr)) {
330                                 continue;
331                         } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
332                                         DATA_GENERIC_ENHANCE)) {
333                                 f2fs_put_dnode(&dn);
334                                 ret = -EFSCORRUPTED;
335                                 f2fs_handle_error(sbi,
336                                                 ERROR_INVALID_BLKADDR);
337                                 goto out;
338                         }
339
340                         new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
341                                                         true, NULL);
342
343                         ret = __replace_atomic_write_block(inode, index, blkaddr,
344                                                         &new->old_addr, false);
345                         if (ret) {
346                                 f2fs_put_dnode(&dn);
347                                 kmem_cache_free(revoke_entry_slab, new);
348                                 goto out;
349                         }
350
351                         f2fs_update_data_blkaddr(&dn, NULL_ADDR);
352                         new->index = index;
353                         list_add_tail(&new->list, &revoke_list);
354                 }
355                 f2fs_put_dnode(&dn);
356 next:
357                 off += blen;
358                 len -= blen;
359         }
360
361 out:
362         if (ret) {
363                 sbi->revoked_atomic_block += fi->atomic_write_cnt;
364         } else {
365                 sbi->committed_atomic_block += fi->atomic_write_cnt;
366                 set_inode_flag(inode, FI_ATOMIC_COMMITTED);
367         }
368
369         __complete_revoke_list(inode, &revoke_list, ret ? true : false);
370
371         return ret;
372 }
373
374 int f2fs_commit_atomic_write(struct inode *inode)
375 {
376         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
377         struct f2fs_inode_info *fi = F2FS_I(inode);
378         int err;
379
380         err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
381         if (err)
382                 return err;
383
384         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
385         f2fs_lock_op(sbi);
386
387         err = __f2fs_commit_atomic_write(inode);
388
389         f2fs_unlock_op(sbi);
390         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
391
392         return err;
393 }
394
395 /*
396  * This function balances dirty node and dentry pages.
397  * In addition, it controls garbage collection.
398  */
399 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
400 {
401         if (time_to_inject(sbi, FAULT_CHECKPOINT))
402                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
403
404         /* balance_fs_bg is able to be pending */
405         if (need && excess_cached_nats(sbi))
406                 f2fs_balance_fs_bg(sbi, false);
407
408         if (!f2fs_is_checkpoint_ready(sbi))
409                 return;
410
411         /*
412          * We should do GC or end up with checkpoint, if there are so many dirty
413          * dir/node pages without enough free segments.
414          */
415         if (has_enough_free_secs(sbi, 0, 0))
416                 return;
417
418         if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
419                                 sbi->gc_thread->f2fs_gc_task) {
420                 DEFINE_WAIT(wait);
421
422                 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
423                                         TASK_UNINTERRUPTIBLE);
424                 wake_up(&sbi->gc_thread->gc_wait_queue_head);
425                 io_schedule();
426                 finish_wait(&sbi->gc_thread->fggc_wq, &wait);
427         } else {
428                 struct f2fs_gc_control gc_control = {
429                         .victim_segno = NULL_SEGNO,
430                         .init_gc_type = BG_GC,
431                         .no_bg_gc = true,
432                         .should_migrate_blocks = false,
433                         .err_gc_skipped = false,
434                         .nr_free_secs = 1 };
435                 f2fs_down_write(&sbi->gc_lock);
436                 f2fs_gc(sbi, &gc_control);
437         }
438 }
439
440 static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
441 {
442         int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
443         unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
444         unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
445         unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
446         unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
447         unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
448         unsigned int threshold = sbi->blocks_per_seg * factor *
449                                         DEFAULT_DIRTY_THRESHOLD;
450         unsigned int global_threshold = threshold * 3 / 2;
451
452         if (dents >= threshold || qdata >= threshold ||
453                 nodes >= threshold || meta >= threshold ||
454                 imeta >= threshold)
455                 return true;
456         return dents + qdata + nodes + meta + imeta >  global_threshold;
457 }
458
459 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
460 {
461         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
462                 return;
463
464         /* try to shrink extent cache when there is no enough memory */
465         if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
466                 f2fs_shrink_read_extent_tree(sbi,
467                                 READ_EXTENT_CACHE_SHRINK_NUMBER);
468
469         /* try to shrink age extent cache when there is no enough memory */
470         if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
471                 f2fs_shrink_age_extent_tree(sbi,
472                                 AGE_EXTENT_CACHE_SHRINK_NUMBER);
473
474         /* check the # of cached NAT entries */
475         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
476                 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
477
478         if (!f2fs_available_free_memory(sbi, FREE_NIDS))
479                 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
480         else
481                 f2fs_build_free_nids(sbi, false, false);
482
483         if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
484                 excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
485                 goto do_sync;
486
487         /* there is background inflight IO or foreground operation recently */
488         if (is_inflight_io(sbi, REQ_TIME) ||
489                 (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
490                 return;
491
492         /* exceed periodical checkpoint timeout threshold */
493         if (f2fs_time_over(sbi, CP_TIME))
494                 goto do_sync;
495
496         /* checkpoint is the only way to shrink partial cached entries */
497         if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
498                 f2fs_available_free_memory(sbi, INO_ENTRIES))
499                 return;
500
501 do_sync:
502         if (test_opt(sbi, DATA_FLUSH) && from_bg) {
503                 struct blk_plug plug;
504
505                 mutex_lock(&sbi->flush_lock);
506
507                 blk_start_plug(&plug);
508                 f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
509                 blk_finish_plug(&plug);
510
511                 mutex_unlock(&sbi->flush_lock);
512         }
513         f2fs_sync_fs(sbi->sb, 1);
514         stat_inc_bg_cp_count(sbi->stat_info);
515 }
516
517 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
518                                 struct block_device *bdev)
519 {
520         int ret = blkdev_issue_flush(bdev);
521
522         trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
523                                 test_opt(sbi, FLUSH_MERGE), ret);
524         if (!ret)
525                 f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
526         return ret;
527 }
528
529 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
530 {
531         int ret = 0;
532         int i;
533
534         if (!f2fs_is_multi_device(sbi))
535                 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
536
537         for (i = 0; i < sbi->s_ndevs; i++) {
538                 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
539                         continue;
540                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
541                 if (ret)
542                         break;
543         }
544         return ret;
545 }
546
547 static int issue_flush_thread(void *data)
548 {
549         struct f2fs_sb_info *sbi = data;
550         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
551         wait_queue_head_t *q = &fcc->flush_wait_queue;
552 repeat:
553         if (kthread_should_stop())
554                 return 0;
555
556         if (!llist_empty(&fcc->issue_list)) {
557                 struct flush_cmd *cmd, *next;
558                 int ret;
559
560                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
561                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
562
563                 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
564
565                 ret = submit_flush_wait(sbi, cmd->ino);
566                 atomic_inc(&fcc->issued_flush);
567
568                 llist_for_each_entry_safe(cmd, next,
569                                           fcc->dispatch_list, llnode) {
570                         cmd->ret = ret;
571                         complete(&cmd->wait);
572                 }
573                 fcc->dispatch_list = NULL;
574         }
575
576         wait_event_interruptible(*q,
577                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
578         goto repeat;
579 }
580
581 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
582 {
583         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
584         struct flush_cmd cmd;
585         int ret;
586
587         if (test_opt(sbi, NOBARRIER))
588                 return 0;
589
590         if (!test_opt(sbi, FLUSH_MERGE)) {
591                 atomic_inc(&fcc->queued_flush);
592                 ret = submit_flush_wait(sbi, ino);
593                 atomic_dec(&fcc->queued_flush);
594                 atomic_inc(&fcc->issued_flush);
595                 return ret;
596         }
597
598         if (atomic_inc_return(&fcc->queued_flush) == 1 ||
599             f2fs_is_multi_device(sbi)) {
600                 ret = submit_flush_wait(sbi, ino);
601                 atomic_dec(&fcc->queued_flush);
602
603                 atomic_inc(&fcc->issued_flush);
604                 return ret;
605         }
606
607         cmd.ino = ino;
608         init_completion(&cmd.wait);
609
610         llist_add(&cmd.llnode, &fcc->issue_list);
611
612         /*
613          * update issue_list before we wake up issue_flush thread, this
614          * smp_mb() pairs with another barrier in ___wait_event(), see
615          * more details in comments of waitqueue_active().
616          */
617         smp_mb();
618
619         if (waitqueue_active(&fcc->flush_wait_queue))
620                 wake_up(&fcc->flush_wait_queue);
621
622         if (fcc->f2fs_issue_flush) {
623                 wait_for_completion(&cmd.wait);
624                 atomic_dec(&fcc->queued_flush);
625         } else {
626                 struct llist_node *list;
627
628                 list = llist_del_all(&fcc->issue_list);
629                 if (!list) {
630                         wait_for_completion(&cmd.wait);
631                         atomic_dec(&fcc->queued_flush);
632                 } else {
633                         struct flush_cmd *tmp, *next;
634
635                         ret = submit_flush_wait(sbi, ino);
636
637                         llist_for_each_entry_safe(tmp, next, list, llnode) {
638                                 if (tmp == &cmd) {
639                                         cmd.ret = ret;
640                                         atomic_dec(&fcc->queued_flush);
641                                         continue;
642                                 }
643                                 tmp->ret = ret;
644                                 complete(&tmp->wait);
645                         }
646                 }
647         }
648
649         return cmd.ret;
650 }
651
652 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
653 {
654         dev_t dev = sbi->sb->s_bdev->bd_dev;
655         struct flush_cmd_control *fcc;
656
657         if (SM_I(sbi)->fcc_info) {
658                 fcc = SM_I(sbi)->fcc_info;
659                 if (fcc->f2fs_issue_flush)
660                         return 0;
661                 goto init_thread;
662         }
663
664         fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
665         if (!fcc)
666                 return -ENOMEM;
667         atomic_set(&fcc->issued_flush, 0);
668         atomic_set(&fcc->queued_flush, 0);
669         init_waitqueue_head(&fcc->flush_wait_queue);
670         init_llist_head(&fcc->issue_list);
671         SM_I(sbi)->fcc_info = fcc;
672         if (!test_opt(sbi, FLUSH_MERGE))
673                 return 0;
674
675 init_thread:
676         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
677                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
678         if (IS_ERR(fcc->f2fs_issue_flush)) {
679                 int err = PTR_ERR(fcc->f2fs_issue_flush);
680
681                 fcc->f2fs_issue_flush = NULL;
682                 return err;
683         }
684
685         return 0;
686 }
687
688 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
689 {
690         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
691
692         if (fcc && fcc->f2fs_issue_flush) {
693                 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
694
695                 fcc->f2fs_issue_flush = NULL;
696                 kthread_stop(flush_thread);
697         }
698         if (free) {
699                 kfree(fcc);
700                 SM_I(sbi)->fcc_info = NULL;
701         }
702 }
703
704 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
705 {
706         int ret = 0, i;
707
708         if (!f2fs_is_multi_device(sbi))
709                 return 0;
710
711         if (test_opt(sbi, NOBARRIER))
712                 return 0;
713
714         for (i = 1; i < sbi->s_ndevs; i++) {
715                 int count = DEFAULT_RETRY_IO_COUNT;
716
717                 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
718                         continue;
719
720                 do {
721                         ret = __submit_flush_wait(sbi, FDEV(i).bdev);
722                         if (ret)
723                                 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
724                 } while (ret && --count);
725
726                 if (ret) {
727                         f2fs_stop_checkpoint(sbi, false,
728                                         STOP_CP_REASON_FLUSH_FAIL);
729                         break;
730                 }
731
732                 spin_lock(&sbi->dev_lock);
733                 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
734                 spin_unlock(&sbi->dev_lock);
735         }
736
737         return ret;
738 }
739
740 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
741                 enum dirty_type dirty_type)
742 {
743         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
744
745         /* need not be added */
746         if (IS_CURSEG(sbi, segno))
747                 return;
748
749         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
750                 dirty_i->nr_dirty[dirty_type]++;
751
752         if (dirty_type == DIRTY) {
753                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
754                 enum dirty_type t = sentry->type;
755
756                 if (unlikely(t >= DIRTY)) {
757                         f2fs_bug_on(sbi, 1);
758                         return;
759                 }
760                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
761                         dirty_i->nr_dirty[t]++;
762
763                 if (__is_large_section(sbi)) {
764                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
765                         block_t valid_blocks =
766                                 get_valid_blocks(sbi, segno, true);
767
768                         f2fs_bug_on(sbi, unlikely(!valid_blocks ||
769                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)));
770
771                         if (!IS_CURSEC(sbi, secno))
772                                 set_bit(secno, dirty_i->dirty_secmap);
773                 }
774         }
775 }
776
777 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
778                 enum dirty_type dirty_type)
779 {
780         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
781         block_t valid_blocks;
782
783         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
784                 dirty_i->nr_dirty[dirty_type]--;
785
786         if (dirty_type == DIRTY) {
787                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
788                 enum dirty_type t = sentry->type;
789
790                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
791                         dirty_i->nr_dirty[t]--;
792
793                 valid_blocks = get_valid_blocks(sbi, segno, true);
794                 if (valid_blocks == 0) {
795                         clear_bit(GET_SEC_FROM_SEG(sbi, segno),
796                                                 dirty_i->victim_secmap);
797 #ifdef CONFIG_F2FS_CHECK_FS
798                         clear_bit(segno, SIT_I(sbi)->invalid_segmap);
799 #endif
800                 }
801                 if (__is_large_section(sbi)) {
802                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
803
804                         if (!valid_blocks ||
805                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
806                                 clear_bit(secno, dirty_i->dirty_secmap);
807                                 return;
808                         }
809
810                         if (!IS_CURSEC(sbi, secno))
811                                 set_bit(secno, dirty_i->dirty_secmap);
812                 }
813         }
814 }
815
816 /*
817  * Should not occur error such as -ENOMEM.
818  * Adding dirty entry into seglist is not critical operation.
819  * If a given segment is one of current working segments, it won't be added.
820  */
821 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
822 {
823         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
824         unsigned short valid_blocks, ckpt_valid_blocks;
825         unsigned int usable_blocks;
826
827         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
828                 return;
829
830         usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
831         mutex_lock(&dirty_i->seglist_lock);
832
833         valid_blocks = get_valid_blocks(sbi, segno, false);
834         ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
835
836         if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
837                 ckpt_valid_blocks == usable_blocks)) {
838                 __locate_dirty_segment(sbi, segno, PRE);
839                 __remove_dirty_segment(sbi, segno, DIRTY);
840         } else if (valid_blocks < usable_blocks) {
841                 __locate_dirty_segment(sbi, segno, DIRTY);
842         } else {
843                 /* Recovery routine with SSR needs this */
844                 __remove_dirty_segment(sbi, segno, DIRTY);
845         }
846
847         mutex_unlock(&dirty_i->seglist_lock);
848 }
849
850 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
851 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
852 {
853         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
854         unsigned int segno;
855
856         mutex_lock(&dirty_i->seglist_lock);
857         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
858                 if (get_valid_blocks(sbi, segno, false))
859                         continue;
860                 if (IS_CURSEG(sbi, segno))
861                         continue;
862                 __locate_dirty_segment(sbi, segno, PRE);
863                 __remove_dirty_segment(sbi, segno, DIRTY);
864         }
865         mutex_unlock(&dirty_i->seglist_lock);
866 }
867
868 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
869 {
870         int ovp_hole_segs =
871                 (overprovision_segments(sbi) - reserved_segments(sbi));
872         block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
873         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
874         block_t holes[2] = {0, 0};      /* DATA and NODE */
875         block_t unusable;
876         struct seg_entry *se;
877         unsigned int segno;
878
879         mutex_lock(&dirty_i->seglist_lock);
880         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
881                 se = get_seg_entry(sbi, segno);
882                 if (IS_NODESEG(se->type))
883                         holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
884                                                         se->valid_blocks;
885                 else
886                         holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
887                                                         se->valid_blocks;
888         }
889         mutex_unlock(&dirty_i->seglist_lock);
890
891         unusable = max(holes[DATA], holes[NODE]);
892         if (unusable > ovp_holes)
893                 return unusable - ovp_holes;
894         return 0;
895 }
896
897 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
898 {
899         int ovp_hole_segs =
900                 (overprovision_segments(sbi) - reserved_segments(sbi));
901         if (unusable > F2FS_OPTION(sbi).unusable_cap)
902                 return -EAGAIN;
903         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
904                 dirty_segments(sbi) > ovp_hole_segs)
905                 return -EAGAIN;
906         return 0;
907 }
908
909 /* This is only used by SBI_CP_DISABLED */
910 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
911 {
912         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
913         unsigned int segno = 0;
914
915         mutex_lock(&dirty_i->seglist_lock);
916         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
917                 if (get_valid_blocks(sbi, segno, false))
918                         continue;
919                 if (get_ckpt_valid_blocks(sbi, segno, false))
920                         continue;
921                 mutex_unlock(&dirty_i->seglist_lock);
922                 return segno;
923         }
924         mutex_unlock(&dirty_i->seglist_lock);
925         return NULL_SEGNO;
926 }
927
928 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
929                 struct block_device *bdev, block_t lstart,
930                 block_t start, block_t len)
931 {
932         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
933         struct list_head *pend_list;
934         struct discard_cmd *dc;
935
936         f2fs_bug_on(sbi, !len);
937
938         pend_list = &dcc->pend_list[plist_idx(len)];
939
940         dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
941         INIT_LIST_HEAD(&dc->list);
942         dc->bdev = bdev;
943         dc->di.lstart = lstart;
944         dc->di.start = start;
945         dc->di.len = len;
946         dc->ref = 0;
947         dc->state = D_PREP;
948         dc->queued = 0;
949         dc->error = 0;
950         init_completion(&dc->wait);
951         list_add_tail(&dc->list, pend_list);
952         spin_lock_init(&dc->lock);
953         dc->bio_ref = 0;
954         atomic_inc(&dcc->discard_cmd_cnt);
955         dcc->undiscard_blks += len;
956
957         return dc;
958 }
959
960 static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
961 {
962 #ifdef CONFIG_F2FS_CHECK_FS
963         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
964         struct rb_node *cur = rb_first_cached(&dcc->root), *next;
965         struct discard_cmd *cur_dc, *next_dc;
966
967         while (cur) {
968                 next = rb_next(cur);
969                 if (!next)
970                         return true;
971
972                 cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
973                 next_dc = rb_entry(next, struct discard_cmd, rb_node);
974
975                 if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
976                         f2fs_info(sbi, "broken discard_rbtree, "
977                                 "cur(%u, %u) next(%u, %u)",
978                                 cur_dc->di.lstart, cur_dc->di.len,
979                                 next_dc->di.lstart, next_dc->di.len);
980                         return false;
981                 }
982                 cur = next;
983         }
984 #endif
985         return true;
986 }
987
988 static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
989                                                 block_t blkaddr)
990 {
991         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
992         struct rb_node *node = dcc->root.rb_root.rb_node;
993         struct discard_cmd *dc;
994
995         while (node) {
996                 dc = rb_entry(node, struct discard_cmd, rb_node);
997
998                 if (blkaddr < dc->di.lstart)
999                         node = node->rb_left;
1000                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1001                         node = node->rb_right;
1002                 else
1003                         return dc;
1004         }
1005         return NULL;
1006 }
1007
1008 static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
1009                                 block_t blkaddr,
1010                                 struct discard_cmd **prev_entry,
1011                                 struct discard_cmd **next_entry,
1012                                 struct rb_node ***insert_p,
1013                                 struct rb_node **insert_parent)
1014 {
1015         struct rb_node **pnode = &root->rb_root.rb_node;
1016         struct rb_node *parent = NULL, *tmp_node;
1017         struct discard_cmd *dc;
1018
1019         *insert_p = NULL;
1020         *insert_parent = NULL;
1021         *prev_entry = NULL;
1022         *next_entry = NULL;
1023
1024         if (RB_EMPTY_ROOT(&root->rb_root))
1025                 return NULL;
1026
1027         while (*pnode) {
1028                 parent = *pnode;
1029                 dc = rb_entry(*pnode, struct discard_cmd, rb_node);
1030
1031                 if (blkaddr < dc->di.lstart)
1032                         pnode = &(*pnode)->rb_left;
1033                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1034                         pnode = &(*pnode)->rb_right;
1035                 else
1036                         goto lookup_neighbors;
1037         }
1038
1039         *insert_p = pnode;
1040         *insert_parent = parent;
1041
1042         dc = rb_entry(parent, struct discard_cmd, rb_node);
1043         tmp_node = parent;
1044         if (parent && blkaddr > dc->di.lstart)
1045                 tmp_node = rb_next(parent);
1046         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1047
1048         tmp_node = parent;
1049         if (parent && blkaddr < dc->di.lstart)
1050                 tmp_node = rb_prev(parent);
1051         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1052         return NULL;
1053
1054 lookup_neighbors:
1055         /* lookup prev node for merging backward later */
1056         tmp_node = rb_prev(&dc->rb_node);
1057         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1058
1059         /* lookup next node for merging frontward later */
1060         tmp_node = rb_next(&dc->rb_node);
1061         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1062         return dc;
1063 }
1064
1065 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
1066                                                         struct discard_cmd *dc)
1067 {
1068         if (dc->state == D_DONE)
1069                 atomic_sub(dc->queued, &dcc->queued_discard);
1070
1071         list_del(&dc->list);
1072         rb_erase_cached(&dc->rb_node, &dcc->root);
1073         dcc->undiscard_blks -= dc->di.len;
1074
1075         kmem_cache_free(discard_cmd_slab, dc);
1076
1077         atomic_dec(&dcc->discard_cmd_cnt);
1078 }
1079
1080 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1081                                                         struct discard_cmd *dc)
1082 {
1083         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1084         unsigned long flags;
1085
1086         trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
1087
1088         spin_lock_irqsave(&dc->lock, flags);
1089         if (dc->bio_ref) {
1090                 spin_unlock_irqrestore(&dc->lock, flags);
1091                 return;
1092         }
1093         spin_unlock_irqrestore(&dc->lock, flags);
1094
1095         f2fs_bug_on(sbi, dc->ref);
1096
1097         if (dc->error == -EOPNOTSUPP)
1098                 dc->error = 0;
1099
1100         if (dc->error)
1101                 printk_ratelimited(
1102                         "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
1103                         KERN_INFO, sbi->sb->s_id,
1104                         dc->di.lstart, dc->di.start, dc->di.len, dc->error);
1105         __detach_discard_cmd(dcc, dc);
1106 }
1107
1108 static void f2fs_submit_discard_endio(struct bio *bio)
1109 {
1110         struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1111         unsigned long flags;
1112
1113         spin_lock_irqsave(&dc->lock, flags);
1114         if (!dc->error)
1115                 dc->error = blk_status_to_errno(bio->bi_status);
1116         dc->bio_ref--;
1117         if (!dc->bio_ref && dc->state == D_SUBMIT) {
1118                 dc->state = D_DONE;
1119                 complete_all(&dc->wait);
1120         }
1121         spin_unlock_irqrestore(&dc->lock, flags);
1122         bio_put(bio);
1123 }
1124
1125 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1126                                 block_t start, block_t end)
1127 {
1128 #ifdef CONFIG_F2FS_CHECK_FS
1129         struct seg_entry *sentry;
1130         unsigned int segno;
1131         block_t blk = start;
1132         unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1133         unsigned long *map;
1134
1135         while (blk < end) {
1136                 segno = GET_SEGNO(sbi, blk);
1137                 sentry = get_seg_entry(sbi, segno);
1138                 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1139
1140                 if (end < START_BLOCK(sbi, segno + 1))
1141                         size = GET_BLKOFF_FROM_SEG0(sbi, end);
1142                 else
1143                         size = max_blocks;
1144                 map = (unsigned long *)(sentry->cur_valid_map);
1145                 offset = __find_rev_next_bit(map, size, offset);
1146                 f2fs_bug_on(sbi, offset != size);
1147                 blk = START_BLOCK(sbi, segno + 1);
1148         }
1149 #endif
1150 }
1151
1152 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1153                                 struct discard_policy *dpolicy,
1154                                 int discard_type, unsigned int granularity)
1155 {
1156         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1157
1158         /* common policy */
1159         dpolicy->type = discard_type;
1160         dpolicy->sync = true;
1161         dpolicy->ordered = false;
1162         dpolicy->granularity = granularity;
1163
1164         dpolicy->max_requests = dcc->max_discard_request;
1165         dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
1166         dpolicy->timeout = false;
1167
1168         if (discard_type == DPOLICY_BG) {
1169                 dpolicy->min_interval = dcc->min_discard_issue_time;
1170                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1171                 dpolicy->max_interval = dcc->max_discard_issue_time;
1172                 dpolicy->io_aware = true;
1173                 dpolicy->sync = false;
1174                 dpolicy->ordered = true;
1175                 if (utilization(sbi) > dcc->discard_urgent_util) {
1176                         dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1177                         if (atomic_read(&dcc->discard_cmd_cnt))
1178                                 dpolicy->max_interval =
1179                                         dcc->min_discard_issue_time;
1180                 }
1181         } else if (discard_type == DPOLICY_FORCE) {
1182                 dpolicy->min_interval = dcc->min_discard_issue_time;
1183                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1184                 dpolicy->max_interval = dcc->max_discard_issue_time;
1185                 dpolicy->io_aware = false;
1186         } else if (discard_type == DPOLICY_FSTRIM) {
1187                 dpolicy->io_aware = false;
1188         } else if (discard_type == DPOLICY_UMOUNT) {
1189                 dpolicy->io_aware = false;
1190                 /* we need to issue all to keep CP_TRIMMED_FLAG */
1191                 dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1192                 dpolicy->timeout = true;
1193         }
1194 }
1195
1196 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1197                                 struct block_device *bdev, block_t lstart,
1198                                 block_t start, block_t len);
1199
1200 #ifdef CONFIG_BLK_DEV_ZONED
1201 static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
1202                                    struct discard_cmd *dc, blk_opf_t flag,
1203                                    struct list_head *wait_list,
1204                                    unsigned int *issued)
1205 {
1206         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1207         struct block_device *bdev = dc->bdev;
1208         struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
1209         unsigned long flags;
1210
1211         trace_f2fs_issue_reset_zone(bdev, dc->di.start);
1212
1213         spin_lock_irqsave(&dc->lock, flags);
1214         dc->state = D_SUBMIT;
1215         dc->bio_ref++;
1216         spin_unlock_irqrestore(&dc->lock, flags);
1217
1218         if (issued)
1219                 (*issued)++;
1220
1221         atomic_inc(&dcc->queued_discard);
1222         dc->queued++;
1223         list_move_tail(&dc->list, wait_list);
1224
1225         /* sanity check on discard range */
1226         __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
1227
1228         bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
1229         bio->bi_private = dc;
1230         bio->bi_end_io = f2fs_submit_discard_endio;
1231         submit_bio(bio);
1232
1233         atomic_inc(&dcc->issued_discard);
1234         f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
1235 }
1236 #endif
1237
1238 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1239 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1240                                 struct discard_policy *dpolicy,
1241                                 struct discard_cmd *dc, int *issued)
1242 {
1243         struct block_device *bdev = dc->bdev;
1244         unsigned int max_discard_blocks =
1245                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1246         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1247         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1248                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1249         blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
1250         block_t lstart, start, len, total_len;
1251         int err = 0;
1252
1253         if (dc->state != D_PREP)
1254                 return 0;
1255
1256         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1257                 return 0;
1258
1259 #ifdef CONFIG_BLK_DEV_ZONED
1260         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
1261                 __submit_zone_reset_cmd(sbi, dc, flag, wait_list, issued);
1262                 return 0;
1263         }
1264 #endif
1265
1266         trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
1267
1268         lstart = dc->di.lstart;
1269         start = dc->di.start;
1270         len = dc->di.len;
1271         total_len = len;
1272
1273         dc->di.len = 0;
1274
1275         while (total_len && *issued < dpolicy->max_requests && !err) {
1276                 struct bio *bio = NULL;
1277                 unsigned long flags;
1278                 bool last = true;
1279
1280                 if (len > max_discard_blocks) {
1281                         len = max_discard_blocks;
1282                         last = false;
1283                 }
1284
1285                 (*issued)++;
1286                 if (*issued == dpolicy->max_requests)
1287                         last = true;
1288
1289                 dc->di.len += len;
1290
1291                 if (time_to_inject(sbi, FAULT_DISCARD)) {
1292                         err = -EIO;
1293                 } else {
1294                         err = __blkdev_issue_discard(bdev,
1295                                         SECTOR_FROM_BLOCK(start),
1296                                         SECTOR_FROM_BLOCK(len),
1297                                         GFP_NOFS, &bio);
1298                 }
1299                 if (err) {
1300                         spin_lock_irqsave(&dc->lock, flags);
1301                         if (dc->state == D_PARTIAL)
1302                                 dc->state = D_SUBMIT;
1303                         spin_unlock_irqrestore(&dc->lock, flags);
1304
1305                         break;
1306                 }
1307
1308                 f2fs_bug_on(sbi, !bio);
1309
1310                 /*
1311                  * should keep before submission to avoid D_DONE
1312                  * right away
1313                  */
1314                 spin_lock_irqsave(&dc->lock, flags);
1315                 if (last)
1316                         dc->state = D_SUBMIT;
1317                 else
1318                         dc->state = D_PARTIAL;
1319                 dc->bio_ref++;
1320                 spin_unlock_irqrestore(&dc->lock, flags);
1321
1322                 atomic_inc(&dcc->queued_discard);
1323                 dc->queued++;
1324                 list_move_tail(&dc->list, wait_list);
1325
1326                 /* sanity check on discard range */
1327                 __check_sit_bitmap(sbi, lstart, lstart + len);
1328
1329                 bio->bi_private = dc;
1330                 bio->bi_end_io = f2fs_submit_discard_endio;
1331                 bio->bi_opf |= flag;
1332                 submit_bio(bio);
1333
1334                 atomic_inc(&dcc->issued_discard);
1335
1336                 f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
1337
1338                 lstart += len;
1339                 start += len;
1340                 total_len -= len;
1341                 len = total_len;
1342         }
1343
1344         if (!err && len) {
1345                 dcc->undiscard_blks -= len;
1346                 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1347         }
1348         return err;
1349 }
1350
1351 static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
1352                                 struct block_device *bdev, block_t lstart,
1353                                 block_t start, block_t len)
1354 {
1355         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1356         struct rb_node **p = &dcc->root.rb_root.rb_node;
1357         struct rb_node *parent = NULL;
1358         struct discard_cmd *dc;
1359         bool leftmost = true;
1360
1361         /* look up rb tree to find parent node */
1362         while (*p) {
1363                 parent = *p;
1364                 dc = rb_entry(parent, struct discard_cmd, rb_node);
1365
1366                 if (lstart < dc->di.lstart) {
1367                         p = &(*p)->rb_left;
1368                 } else if (lstart >= dc->di.lstart + dc->di.len) {
1369                         p = &(*p)->rb_right;
1370                         leftmost = false;
1371                 } else {
1372                         f2fs_bug_on(sbi, 1);
1373                 }
1374         }
1375
1376         dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
1377
1378         rb_link_node(&dc->rb_node, parent, p);
1379         rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
1380 }
1381
1382 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1383                                                 struct discard_cmd *dc)
1384 {
1385         list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
1386 }
1387
1388 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1389                                 struct discard_cmd *dc, block_t blkaddr)
1390 {
1391         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1392         struct discard_info di = dc->di;
1393         bool modified = false;
1394
1395         if (dc->state == D_DONE || dc->di.len == 1) {
1396                 __remove_discard_cmd(sbi, dc);
1397                 return;
1398         }
1399
1400         dcc->undiscard_blks -= di.len;
1401
1402         if (blkaddr > di.lstart) {
1403                 dc->di.len = blkaddr - dc->di.lstart;
1404                 dcc->undiscard_blks += dc->di.len;
1405                 __relocate_discard_cmd(dcc, dc);
1406                 modified = true;
1407         }
1408
1409         if (blkaddr < di.lstart + di.len - 1) {
1410                 if (modified) {
1411                         __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
1412                                         di.start + blkaddr + 1 - di.lstart,
1413                                         di.lstart + di.len - 1 - blkaddr);
1414                 } else {
1415                         dc->di.lstart++;
1416                         dc->di.len--;
1417                         dc->di.start++;
1418                         dcc->undiscard_blks += dc->di.len;
1419                         __relocate_discard_cmd(dcc, dc);
1420                 }
1421         }
1422 }
1423
1424 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1425                                 struct block_device *bdev, block_t lstart,
1426                                 block_t start, block_t len)
1427 {
1428         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1429         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1430         struct discard_cmd *dc;
1431         struct discard_info di = {0};
1432         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1433         unsigned int max_discard_blocks =
1434                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1435         block_t end = lstart + len;
1436
1437         dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
1438                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1439         if (dc)
1440                 prev_dc = dc;
1441
1442         if (!prev_dc) {
1443                 di.lstart = lstart;
1444                 di.len = next_dc ? next_dc->di.lstart - lstart : len;
1445                 di.len = min(di.len, len);
1446                 di.start = start;
1447         }
1448
1449         while (1) {
1450                 struct rb_node *node;
1451                 bool merged = false;
1452                 struct discard_cmd *tdc = NULL;
1453
1454                 if (prev_dc) {
1455                         di.lstart = prev_dc->di.lstart + prev_dc->di.len;
1456                         if (di.lstart < lstart)
1457                                 di.lstart = lstart;
1458                         if (di.lstart >= end)
1459                                 break;
1460
1461                         if (!next_dc || next_dc->di.lstart > end)
1462                                 di.len = end - di.lstart;
1463                         else
1464                                 di.len = next_dc->di.lstart - di.lstart;
1465                         di.start = start + di.lstart - lstart;
1466                 }
1467
1468                 if (!di.len)
1469                         goto next;
1470
1471                 if (prev_dc && prev_dc->state == D_PREP &&
1472                         prev_dc->bdev == bdev &&
1473                         __is_discard_back_mergeable(&di, &prev_dc->di,
1474                                                         max_discard_blocks)) {
1475                         prev_dc->di.len += di.len;
1476                         dcc->undiscard_blks += di.len;
1477                         __relocate_discard_cmd(dcc, prev_dc);
1478                         di = prev_dc->di;
1479                         tdc = prev_dc;
1480                         merged = true;
1481                 }
1482
1483                 if (next_dc && next_dc->state == D_PREP &&
1484                         next_dc->bdev == bdev &&
1485                         __is_discard_front_mergeable(&di, &next_dc->di,
1486                                                         max_discard_blocks)) {
1487                         next_dc->di.lstart = di.lstart;
1488                         next_dc->di.len += di.len;
1489                         next_dc->di.start = di.start;
1490                         dcc->undiscard_blks += di.len;
1491                         __relocate_discard_cmd(dcc, next_dc);
1492                         if (tdc)
1493                                 __remove_discard_cmd(sbi, tdc);
1494                         merged = true;
1495                 }
1496
1497                 if (!merged)
1498                         __insert_discard_cmd(sbi, bdev,
1499                                                 di.lstart, di.start, di.len);
1500  next:
1501                 prev_dc = next_dc;
1502                 if (!prev_dc)
1503                         break;
1504
1505                 node = rb_next(&prev_dc->rb_node);
1506                 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1507         }
1508 }
1509
1510 #ifdef CONFIG_BLK_DEV_ZONED
1511 static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
1512                 struct block_device *bdev, block_t blkstart, block_t lblkstart,
1513                 block_t blklen)
1514 {
1515         trace_f2fs_queue_reset_zone(bdev, blkstart);
1516
1517         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1518         __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
1519         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1520 }
1521 #endif
1522
1523 static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
1524                 struct block_device *bdev, block_t blkstart, block_t blklen)
1525 {
1526         block_t lblkstart = blkstart;
1527
1528         if (!f2fs_bdev_support_discard(bdev))
1529                 return;
1530
1531         trace_f2fs_queue_discard(bdev, blkstart, blklen);
1532
1533         if (f2fs_is_multi_device(sbi)) {
1534                 int devi = f2fs_target_device_index(sbi, blkstart);
1535
1536                 blkstart -= FDEV(devi).start_blk;
1537         }
1538         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1539         __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1540         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1541 }
1542
1543 static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1544                 struct discard_policy *dpolicy, int *issued)
1545 {
1546         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1547         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1548         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1549         struct discard_cmd *dc;
1550         struct blk_plug plug;
1551         bool io_interrupted = false;
1552
1553         mutex_lock(&dcc->cmd_lock);
1554         dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
1555                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1556         if (!dc)
1557                 dc = next_dc;
1558
1559         blk_start_plug(&plug);
1560
1561         while (dc) {
1562                 struct rb_node *node;
1563                 int err = 0;
1564
1565                 if (dc->state != D_PREP)
1566                         goto next;
1567
1568                 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1569                         io_interrupted = true;
1570                         break;
1571                 }
1572
1573                 dcc->next_pos = dc->di.lstart + dc->di.len;
1574                 err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
1575
1576                 if (*issued >= dpolicy->max_requests)
1577                         break;
1578 next:
1579                 node = rb_next(&dc->rb_node);
1580                 if (err)
1581                         __remove_discard_cmd(sbi, dc);
1582                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1583         }
1584
1585         blk_finish_plug(&plug);
1586
1587         if (!dc)
1588                 dcc->next_pos = 0;
1589
1590         mutex_unlock(&dcc->cmd_lock);
1591
1592         if (!(*issued) && io_interrupted)
1593                 *issued = -1;
1594 }
1595 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1596                                         struct discard_policy *dpolicy);
1597
1598 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1599                                         struct discard_policy *dpolicy)
1600 {
1601         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1602         struct list_head *pend_list;
1603         struct discard_cmd *dc, *tmp;
1604         struct blk_plug plug;
1605         int i, issued;
1606         bool io_interrupted = false;
1607
1608         if (dpolicy->timeout)
1609                 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1610
1611 retry:
1612         issued = 0;
1613         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1614                 if (dpolicy->timeout &&
1615                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1616                         break;
1617
1618                 if (i + 1 < dpolicy->granularity)
1619                         break;
1620
1621                 if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
1622                         __issue_discard_cmd_orderly(sbi, dpolicy, &issued);
1623                         return issued;
1624                 }
1625
1626                 pend_list = &dcc->pend_list[i];
1627
1628                 mutex_lock(&dcc->cmd_lock);
1629                 if (list_empty(pend_list))
1630                         goto next;
1631                 if (unlikely(dcc->rbtree_check))
1632                         f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
1633                 blk_start_plug(&plug);
1634                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1635                         f2fs_bug_on(sbi, dc->state != D_PREP);
1636
1637                         if (dpolicy->timeout &&
1638                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1639                                 break;
1640
1641                         if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1642                                                 !is_idle(sbi, DISCARD_TIME)) {
1643                                 io_interrupted = true;
1644                                 break;
1645                         }
1646
1647                         __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1648
1649                         if (issued >= dpolicy->max_requests)
1650                                 break;
1651                 }
1652                 blk_finish_plug(&plug);
1653 next:
1654                 mutex_unlock(&dcc->cmd_lock);
1655
1656                 if (issued >= dpolicy->max_requests || io_interrupted)
1657                         break;
1658         }
1659
1660         if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1661                 __wait_all_discard_cmd(sbi, dpolicy);
1662                 goto retry;
1663         }
1664
1665         if (!issued && io_interrupted)
1666                 issued = -1;
1667
1668         return issued;
1669 }
1670
1671 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1672 {
1673         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1674         struct list_head *pend_list;
1675         struct discard_cmd *dc, *tmp;
1676         int i;
1677         bool dropped = false;
1678
1679         mutex_lock(&dcc->cmd_lock);
1680         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1681                 pend_list = &dcc->pend_list[i];
1682                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1683                         f2fs_bug_on(sbi, dc->state != D_PREP);
1684                         __remove_discard_cmd(sbi, dc);
1685                         dropped = true;
1686                 }
1687         }
1688         mutex_unlock(&dcc->cmd_lock);
1689
1690         return dropped;
1691 }
1692
1693 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1694 {
1695         __drop_discard_cmd(sbi);
1696 }
1697
1698 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1699                                                         struct discard_cmd *dc)
1700 {
1701         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1702         unsigned int len = 0;
1703
1704         wait_for_completion_io(&dc->wait);
1705         mutex_lock(&dcc->cmd_lock);
1706         f2fs_bug_on(sbi, dc->state != D_DONE);
1707         dc->ref--;
1708         if (!dc->ref) {
1709                 if (!dc->error)
1710                         len = dc->di.len;
1711                 __remove_discard_cmd(sbi, dc);
1712         }
1713         mutex_unlock(&dcc->cmd_lock);
1714
1715         return len;
1716 }
1717
1718 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1719                                                 struct discard_policy *dpolicy,
1720                                                 block_t start, block_t end)
1721 {
1722         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1723         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1724                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1725         struct discard_cmd *dc = NULL, *iter, *tmp;
1726         unsigned int trimmed = 0;
1727
1728 next:
1729         dc = NULL;
1730
1731         mutex_lock(&dcc->cmd_lock);
1732         list_for_each_entry_safe(iter, tmp, wait_list, list) {
1733                 if (iter->di.lstart + iter->di.len <= start ||
1734                                         end <= iter->di.lstart)
1735                         continue;
1736                 if (iter->di.len < dpolicy->granularity)
1737                         continue;
1738                 if (iter->state == D_DONE && !iter->ref) {
1739                         wait_for_completion_io(&iter->wait);
1740                         if (!iter->error)
1741                                 trimmed += iter->di.len;
1742                         __remove_discard_cmd(sbi, iter);
1743                 } else {
1744                         iter->ref++;
1745                         dc = iter;
1746                         break;
1747                 }
1748         }
1749         mutex_unlock(&dcc->cmd_lock);
1750
1751         if (dc) {
1752                 trimmed += __wait_one_discard_bio(sbi, dc);
1753                 goto next;
1754         }
1755
1756         return trimmed;
1757 }
1758
1759 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1760                                                 struct discard_policy *dpolicy)
1761 {
1762         struct discard_policy dp;
1763         unsigned int discard_blks;
1764
1765         if (dpolicy)
1766                 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1767
1768         /* wait all */
1769         __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
1770         discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1771         __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
1772         discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1773
1774         return discard_blks;
1775 }
1776
1777 /* This should be covered by global mutex, &sit_i->sentry_lock */
1778 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1779 {
1780         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1781         struct discard_cmd *dc;
1782         bool need_wait = false;
1783
1784         mutex_lock(&dcc->cmd_lock);
1785         dc = __lookup_discard_cmd(sbi, blkaddr);
1786 #ifdef CONFIG_BLK_DEV_ZONED
1787         if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
1788                 /* force submit zone reset */
1789                 if (dc->state == D_PREP)
1790                         __submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
1791                                                 &dcc->wait_list, NULL);
1792                 dc->ref++;
1793                 mutex_unlock(&dcc->cmd_lock);
1794                 /* wait zone reset */
1795                 __wait_one_discard_bio(sbi, dc);
1796                 return;
1797         }
1798 #endif
1799         if (dc) {
1800                 if (dc->state == D_PREP) {
1801                         __punch_discard_cmd(sbi, dc, blkaddr);
1802                 } else {
1803                         dc->ref++;
1804                         need_wait = true;
1805                 }
1806         }
1807         mutex_unlock(&dcc->cmd_lock);
1808
1809         if (need_wait)
1810                 __wait_one_discard_bio(sbi, dc);
1811 }
1812
1813 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1814 {
1815         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1816
1817         if (dcc && dcc->f2fs_issue_discard) {
1818                 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1819
1820                 dcc->f2fs_issue_discard = NULL;
1821                 kthread_stop(discard_thread);
1822         }
1823 }
1824
1825 /**
1826  * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
1827  * @sbi: the f2fs_sb_info data for discard cmd to issue
1828  *
1829  * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
1830  *
1831  * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
1832  */
1833 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1834 {
1835         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1836         struct discard_policy dpolicy;
1837         bool dropped;
1838
1839         if (!atomic_read(&dcc->discard_cmd_cnt))
1840                 return true;
1841
1842         __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1843                                         dcc->discard_granularity);
1844         __issue_discard_cmd(sbi, &dpolicy);
1845         dropped = __drop_discard_cmd(sbi);
1846
1847         /* just to make sure there is no pending discard commands */
1848         __wait_all_discard_cmd(sbi, NULL);
1849
1850         f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1851         return !dropped;
1852 }
1853
1854 static int issue_discard_thread(void *data)
1855 {
1856         struct f2fs_sb_info *sbi = data;
1857         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1858         wait_queue_head_t *q = &dcc->discard_wait_queue;
1859         struct discard_policy dpolicy;
1860         unsigned int wait_ms = dcc->min_discard_issue_time;
1861         int issued;
1862
1863         set_freezable();
1864
1865         do {
1866                 wait_event_interruptible_timeout(*q,
1867                                 kthread_should_stop() || freezing(current) ||
1868                                 dcc->discard_wake,
1869                                 msecs_to_jiffies(wait_ms));
1870
1871                 if (sbi->gc_mode == GC_URGENT_HIGH ||
1872                         !f2fs_available_free_memory(sbi, DISCARD_CACHE))
1873                         __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
1874                                                 MIN_DISCARD_GRANULARITY);
1875                 else
1876                         __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1877                                                 dcc->discard_granularity);
1878
1879                 if (dcc->discard_wake)
1880                         dcc->discard_wake = false;
1881
1882                 /* clean up pending candidates before going to sleep */
1883                 if (atomic_read(&dcc->queued_discard))
1884                         __wait_all_discard_cmd(sbi, NULL);
1885
1886                 if (try_to_freeze())
1887                         continue;
1888                 if (f2fs_readonly(sbi->sb))
1889                         continue;
1890                 if (kthread_should_stop())
1891                         return 0;
1892                 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
1893                         !atomic_read(&dcc->discard_cmd_cnt)) {
1894                         wait_ms = dpolicy.max_interval;
1895                         continue;
1896                 }
1897
1898                 sb_start_intwrite(sbi->sb);
1899
1900                 issued = __issue_discard_cmd(sbi, &dpolicy);
1901                 if (issued > 0) {
1902                         __wait_all_discard_cmd(sbi, &dpolicy);
1903                         wait_ms = dpolicy.min_interval;
1904                 } else if (issued == -1) {
1905                         wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1906                         if (!wait_ms)
1907                                 wait_ms = dpolicy.mid_interval;
1908                 } else {
1909                         wait_ms = dpolicy.max_interval;
1910                 }
1911                 if (!atomic_read(&dcc->discard_cmd_cnt))
1912                         wait_ms = dpolicy.max_interval;
1913
1914                 sb_end_intwrite(sbi->sb);
1915
1916         } while (!kthread_should_stop());
1917         return 0;
1918 }
1919
1920 #ifdef CONFIG_BLK_DEV_ZONED
1921 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1922                 struct block_device *bdev, block_t blkstart, block_t blklen)
1923 {
1924         sector_t sector, nr_sects;
1925         block_t lblkstart = blkstart;
1926         int devi = 0;
1927         u64 remainder = 0;
1928
1929         if (f2fs_is_multi_device(sbi)) {
1930                 devi = f2fs_target_device_index(sbi, blkstart);
1931                 if (blkstart < FDEV(devi).start_blk ||
1932                     blkstart > FDEV(devi).end_blk) {
1933                         f2fs_err(sbi, "Invalid block %x", blkstart);
1934                         return -EIO;
1935                 }
1936                 blkstart -= FDEV(devi).start_blk;
1937         }
1938
1939         /* For sequential zones, reset the zone write pointer */
1940         if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1941                 sector = SECTOR_FROM_BLOCK(blkstart);
1942                 nr_sects = SECTOR_FROM_BLOCK(blklen);
1943                 div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
1944
1945                 if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
1946                         f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1947                                  devi, sbi->s_ndevs ? FDEV(devi).path : "",
1948                                  blkstart, blklen);
1949                         return -EIO;
1950                 }
1951
1952                 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
1953                         trace_f2fs_issue_reset_zone(bdev, blkstart);
1954                         return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
1955                                                 sector, nr_sects, GFP_NOFS);
1956                 }
1957
1958                 __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
1959                 return 0;
1960         }
1961
1962         /* For conventional zones, use regular discard if supported */
1963         __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1964         return 0;
1965 }
1966 #endif
1967
1968 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1969                 struct block_device *bdev, block_t blkstart, block_t blklen)
1970 {
1971 #ifdef CONFIG_BLK_DEV_ZONED
1972         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1973                 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1974 #endif
1975         __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1976         return 0;
1977 }
1978
1979 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1980                                 block_t blkstart, block_t blklen)
1981 {
1982         sector_t start = blkstart, len = 0;
1983         struct block_device *bdev;
1984         struct seg_entry *se;
1985         unsigned int offset;
1986         block_t i;
1987         int err = 0;
1988
1989         bdev = f2fs_target_device(sbi, blkstart, NULL);
1990
1991         for (i = blkstart; i < blkstart + blklen; i++, len++) {
1992                 if (i != start) {
1993                         struct block_device *bdev2 =
1994                                 f2fs_target_device(sbi, i, NULL);
1995
1996                         if (bdev2 != bdev) {
1997                                 err = __issue_discard_async(sbi, bdev,
1998                                                 start, len);
1999                                 if (err)
2000                                         return err;
2001                                 bdev = bdev2;
2002                                 start = i;
2003                                 len = 0;
2004                         }
2005                 }
2006
2007                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
2008                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
2009
2010                 if (f2fs_block_unit_discard(sbi) &&
2011                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2012                         sbi->discard_blks--;
2013         }
2014
2015         if (len)
2016                 err = __issue_discard_async(sbi, bdev, start, len);
2017         return err;
2018 }
2019
2020 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
2021                                                         bool check_only)
2022 {
2023         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2024         int max_blocks = sbi->blocks_per_seg;
2025         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
2026         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2027         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2028         unsigned long *discard_map = (unsigned long *)se->discard_map;
2029         unsigned long *dmap = SIT_I(sbi)->tmp_map;
2030         unsigned int start = 0, end = -1;
2031         bool force = (cpc->reason & CP_DISCARD);
2032         struct discard_entry *de = NULL;
2033         struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
2034         int i;
2035
2036         if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
2037                         !f2fs_block_unit_discard(sbi))
2038                 return false;
2039
2040         if (!force) {
2041                 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
2042                         SM_I(sbi)->dcc_info->nr_discards >=
2043                                 SM_I(sbi)->dcc_info->max_discards)
2044                         return false;
2045         }
2046
2047         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
2048         for (i = 0; i < entries; i++)
2049                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
2050                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
2051
2052         while (force || SM_I(sbi)->dcc_info->nr_discards <=
2053                                 SM_I(sbi)->dcc_info->max_discards) {
2054                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
2055                 if (start >= max_blocks)
2056                         break;
2057
2058                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
2059                 if (force && start && end != max_blocks
2060                                         && (end - start) < cpc->trim_minlen)
2061                         continue;
2062
2063                 if (check_only)
2064                         return true;
2065
2066                 if (!de) {
2067                         de = f2fs_kmem_cache_alloc(discard_entry_slab,
2068                                                 GFP_F2FS_ZERO, true, NULL);
2069                         de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
2070                         list_add_tail(&de->list, head);
2071                 }
2072
2073                 for (i = start; i < end; i++)
2074                         __set_bit_le(i, (void *)de->discard_map);
2075
2076                 SM_I(sbi)->dcc_info->nr_discards += end - start;
2077         }
2078         return false;
2079 }
2080
2081 static void release_discard_addr(struct discard_entry *entry)
2082 {
2083         list_del(&entry->list);
2084         kmem_cache_free(discard_entry_slab, entry);
2085 }
2086
2087 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
2088 {
2089         struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
2090         struct discard_entry *entry, *this;
2091
2092         /* drop caches */
2093         list_for_each_entry_safe(entry, this, head, list)
2094                 release_discard_addr(entry);
2095 }
2096
2097 /*
2098  * Should call f2fs_clear_prefree_segments after checkpoint is done.
2099  */
2100 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
2101 {
2102         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2103         unsigned int segno;
2104
2105         mutex_lock(&dirty_i->seglist_lock);
2106         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
2107                 __set_test_and_free(sbi, segno, false);
2108         mutex_unlock(&dirty_i->seglist_lock);
2109 }
2110
2111 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
2112                                                 struct cp_control *cpc)
2113 {
2114         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2115         struct list_head *head = &dcc->entry_list;
2116         struct discard_entry *entry, *this;
2117         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2118         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
2119         unsigned int start = 0, end = -1;
2120         unsigned int secno, start_segno;
2121         bool force = (cpc->reason & CP_DISCARD);
2122         bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
2123                                                 DISCARD_UNIT_SECTION;
2124
2125         if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
2126                 section_alignment = true;
2127
2128         mutex_lock(&dirty_i->seglist_lock);
2129
2130         while (1) {
2131                 int i;
2132
2133                 if (section_alignment && end != -1)
2134                         end--;
2135                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2136                 if (start >= MAIN_SEGS(sbi))
2137                         break;
2138                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2139                                                                 start + 1);
2140
2141                 if (section_alignment) {
2142                         start = rounddown(start, sbi->segs_per_sec);
2143                         end = roundup(end, sbi->segs_per_sec);
2144                 }
2145
2146                 for (i = start; i < end; i++) {
2147                         if (test_and_clear_bit(i, prefree_map))
2148                                 dirty_i->nr_dirty[PRE]--;
2149                 }
2150
2151                 if (!f2fs_realtime_discard_enable(sbi))
2152                         continue;
2153
2154                 if (force && start >= cpc->trim_start &&
2155                                         (end - 1) <= cpc->trim_end)
2156                         continue;
2157
2158                 /* Should cover 2MB zoned device for zone-based reset */
2159                 if (!f2fs_sb_has_blkzoned(sbi) &&
2160                     (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
2161                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2162                                 (end - start) << sbi->log_blocks_per_seg);
2163                         continue;
2164                 }
2165 next:
2166                 secno = GET_SEC_FROM_SEG(sbi, start);
2167                 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2168                 if (!IS_CURSEC(sbi, secno) &&
2169                         !get_valid_blocks(sbi, start, true))
2170                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2171                                 sbi->segs_per_sec << sbi->log_blocks_per_seg);
2172
2173                 start = start_segno + sbi->segs_per_sec;
2174                 if (start < end)
2175                         goto next;
2176                 else
2177                         end = start - 1;
2178         }
2179         mutex_unlock(&dirty_i->seglist_lock);
2180
2181         if (!f2fs_block_unit_discard(sbi))
2182                 goto wakeup;
2183
2184         /* send small discards */
2185         list_for_each_entry_safe(entry, this, head, list) {
2186                 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2187                 bool is_valid = test_bit_le(0, entry->discard_map);
2188
2189 find_next:
2190                 if (is_valid) {
2191                         next_pos = find_next_zero_bit_le(entry->discard_map,
2192                                         sbi->blocks_per_seg, cur_pos);
2193                         len = next_pos - cur_pos;
2194
2195                         if (f2fs_sb_has_blkzoned(sbi) ||
2196                                         !force || len < cpc->trim_minlen)
2197                                 goto skip;
2198
2199                         f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2200                                                                         len);
2201                         total_len += len;
2202                 } else {
2203                         next_pos = find_next_bit_le(entry->discard_map,
2204                                         sbi->blocks_per_seg, cur_pos);
2205                 }
2206 skip:
2207                 cur_pos = next_pos;
2208                 is_valid = !is_valid;
2209
2210                 if (cur_pos < sbi->blocks_per_seg)
2211                         goto find_next;
2212
2213                 release_discard_addr(entry);
2214                 dcc->nr_discards -= total_len;
2215         }
2216
2217 wakeup:
2218         wake_up_discard_thread(sbi, false);
2219 }
2220
2221 int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
2222 {
2223         dev_t dev = sbi->sb->s_bdev->bd_dev;
2224         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2225         int err = 0;
2226
2227         if (!f2fs_realtime_discard_enable(sbi))
2228                 return 0;
2229
2230         dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2231                                 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2232         if (IS_ERR(dcc->f2fs_issue_discard)) {
2233                 err = PTR_ERR(dcc->f2fs_issue_discard);
2234                 dcc->f2fs_issue_discard = NULL;
2235         }
2236
2237         return err;
2238 }
2239
2240 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2241 {
2242         struct discard_cmd_control *dcc;
2243         int err = 0, i;
2244
2245         if (SM_I(sbi)->dcc_info) {
2246                 dcc = SM_I(sbi)->dcc_info;
2247                 goto init_thread;
2248         }
2249
2250         dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2251         if (!dcc)
2252                 return -ENOMEM;
2253
2254         dcc->discard_io_aware_gran = MAX_PLIST_NUM;
2255         dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2256         dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
2257         if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
2258                 dcc->discard_granularity = sbi->blocks_per_seg;
2259         else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
2260                 dcc->discard_granularity = BLKS_PER_SEC(sbi);
2261
2262         INIT_LIST_HEAD(&dcc->entry_list);
2263         for (i = 0; i < MAX_PLIST_NUM; i++)
2264                 INIT_LIST_HEAD(&dcc->pend_list[i]);
2265         INIT_LIST_HEAD(&dcc->wait_list);
2266         INIT_LIST_HEAD(&dcc->fstrim_list);
2267         mutex_init(&dcc->cmd_lock);
2268         atomic_set(&dcc->issued_discard, 0);
2269         atomic_set(&dcc->queued_discard, 0);
2270         atomic_set(&dcc->discard_cmd_cnt, 0);
2271         dcc->nr_discards = 0;
2272         dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2273         dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
2274         dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
2275         dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
2276         dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
2277         dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
2278         dcc->undiscard_blks = 0;
2279         dcc->next_pos = 0;
2280         dcc->root = RB_ROOT_CACHED;
2281         dcc->rbtree_check = false;
2282
2283         init_waitqueue_head(&dcc->discard_wait_queue);
2284         SM_I(sbi)->dcc_info = dcc;
2285 init_thread:
2286         err = f2fs_start_discard_thread(sbi);
2287         if (err) {
2288                 kfree(dcc);
2289                 SM_I(sbi)->dcc_info = NULL;
2290         }
2291
2292         return err;
2293 }
2294
2295 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2296 {
2297         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2298
2299         if (!dcc)
2300                 return;
2301
2302         f2fs_stop_discard_thread(sbi);
2303
2304         /*
2305          * Recovery can cache discard commands, so in error path of
2306          * fill_super(), it needs to give a chance to handle them.
2307          */
2308         f2fs_issue_discard_timeout(sbi);
2309
2310         kfree(dcc);
2311         SM_I(sbi)->dcc_info = NULL;
2312 }
2313
2314 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2315 {
2316         struct sit_info *sit_i = SIT_I(sbi);
2317
2318         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2319                 sit_i->dirty_sentries++;
2320                 return false;
2321         }
2322
2323         return true;
2324 }
2325
2326 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2327                                         unsigned int segno, int modified)
2328 {
2329         struct seg_entry *se = get_seg_entry(sbi, segno);
2330
2331         se->type = type;
2332         if (modified)
2333                 __mark_sit_entry_dirty(sbi, segno);
2334 }
2335
2336 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2337                                                                 block_t blkaddr)
2338 {
2339         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2340
2341         if (segno == NULL_SEGNO)
2342                 return 0;
2343         return get_seg_entry(sbi, segno)->mtime;
2344 }
2345
2346 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2347                                                 unsigned long long old_mtime)
2348 {
2349         struct seg_entry *se;
2350         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2351         unsigned long long ctime = get_mtime(sbi, false);
2352         unsigned long long mtime = old_mtime ? old_mtime : ctime;
2353
2354         if (segno == NULL_SEGNO)
2355                 return;
2356
2357         se = get_seg_entry(sbi, segno);
2358
2359         if (!se->mtime)
2360                 se->mtime = mtime;
2361         else
2362                 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2363                                                 se->valid_blocks + 1);
2364
2365         if (ctime > SIT_I(sbi)->max_mtime)
2366                 SIT_I(sbi)->max_mtime = ctime;
2367 }
2368
2369 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2370 {
2371         struct seg_entry *se;
2372         unsigned int segno, offset;
2373         long int new_vblocks;
2374         bool exist;
2375 #ifdef CONFIG_F2FS_CHECK_FS
2376         bool mir_exist;
2377 #endif
2378
2379         segno = GET_SEGNO(sbi, blkaddr);
2380
2381         se = get_seg_entry(sbi, segno);
2382         new_vblocks = se->valid_blocks + del;
2383         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2384
2385         f2fs_bug_on(sbi, (new_vblocks < 0 ||
2386                         (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2387
2388         se->valid_blocks = new_vblocks;
2389
2390         /* Update valid block bitmap */
2391         if (del > 0) {
2392                 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2393 #ifdef CONFIG_F2FS_CHECK_FS
2394                 mir_exist = f2fs_test_and_set_bit(offset,
2395                                                 se->cur_valid_map_mir);
2396                 if (unlikely(exist != mir_exist)) {
2397                         f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2398                                  blkaddr, exist);
2399                         f2fs_bug_on(sbi, 1);
2400                 }
2401 #endif
2402                 if (unlikely(exist)) {
2403                         f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2404                                  blkaddr);
2405                         f2fs_bug_on(sbi, 1);
2406                         se->valid_blocks--;
2407                         del = 0;
2408                 }
2409
2410                 if (f2fs_block_unit_discard(sbi) &&
2411                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2412                         sbi->discard_blks--;
2413
2414                 /*
2415                  * SSR should never reuse block which is checkpointed
2416                  * or newly invalidated.
2417                  */
2418                 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2419                         if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2420                                 se->ckpt_valid_blocks++;
2421                 }
2422         } else {
2423                 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2424 #ifdef CONFIG_F2FS_CHECK_FS
2425                 mir_exist = f2fs_test_and_clear_bit(offset,
2426                                                 se->cur_valid_map_mir);
2427                 if (unlikely(exist != mir_exist)) {
2428                         f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2429                                  blkaddr, exist);
2430                         f2fs_bug_on(sbi, 1);
2431                 }
2432 #endif
2433                 if (unlikely(!exist)) {
2434                         f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2435                                  blkaddr);
2436                         f2fs_bug_on(sbi, 1);
2437                         se->valid_blocks++;
2438                         del = 0;
2439                 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2440                         /*
2441                          * If checkpoints are off, we must not reuse data that
2442                          * was used in the previous checkpoint. If it was used
2443                          * before, we must track that to know how much space we
2444                          * really have.
2445                          */
2446                         if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2447                                 spin_lock(&sbi->stat_lock);
2448                                 sbi->unusable_block_count++;
2449                                 spin_unlock(&sbi->stat_lock);
2450                         }
2451                 }
2452
2453                 if (f2fs_block_unit_discard(sbi) &&
2454                         f2fs_test_and_clear_bit(offset, se->discard_map))
2455                         sbi->discard_blks++;
2456         }
2457         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2458                 se->ckpt_valid_blocks += del;
2459
2460         __mark_sit_entry_dirty(sbi, segno);
2461
2462         /* update total number of valid blocks to be written in ckpt area */
2463         SIT_I(sbi)->written_valid_blocks += del;
2464
2465         if (__is_large_section(sbi))
2466                 get_sec_entry(sbi, segno)->valid_blocks += del;
2467 }
2468
2469 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2470 {
2471         unsigned int segno = GET_SEGNO(sbi, addr);
2472         struct sit_info *sit_i = SIT_I(sbi);
2473
2474         f2fs_bug_on(sbi, addr == NULL_ADDR);
2475         if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2476                 return;
2477
2478         invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2479         f2fs_invalidate_compress_page(sbi, addr);
2480
2481         /* add it into sit main buffer */
2482         down_write(&sit_i->sentry_lock);
2483
2484         update_segment_mtime(sbi, addr, 0);
2485         update_sit_entry(sbi, addr, -1);
2486
2487         /* add it into dirty seglist */
2488         locate_dirty_segment(sbi, segno);
2489
2490         up_write(&sit_i->sentry_lock);
2491 }
2492
2493 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2494 {
2495         struct sit_info *sit_i = SIT_I(sbi);
2496         unsigned int segno, offset;
2497         struct seg_entry *se;
2498         bool is_cp = false;
2499
2500         if (!__is_valid_data_blkaddr(blkaddr))
2501                 return true;
2502
2503         down_read(&sit_i->sentry_lock);
2504
2505         segno = GET_SEGNO(sbi, blkaddr);
2506         se = get_seg_entry(sbi, segno);
2507         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2508
2509         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2510                 is_cp = true;
2511
2512         up_read(&sit_i->sentry_lock);
2513
2514         return is_cp;
2515 }
2516
2517 static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
2518 {
2519         struct curseg_info *curseg = CURSEG_I(sbi, type);
2520
2521         if (sbi->ckpt->alloc_type[type] == SSR)
2522                 return sbi->blocks_per_seg;
2523         return curseg->next_blkoff;
2524 }
2525
2526 /*
2527  * Calculate the number of current summary pages for writing
2528  */
2529 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2530 {
2531         int valid_sum_count = 0;
2532         int i, sum_in_page;
2533
2534         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2535                 if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
2536                         valid_sum_count +=
2537                                 le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2538                 else
2539                         valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
2540         }
2541
2542         sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2543                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2544         if (valid_sum_count <= sum_in_page)
2545                 return 1;
2546         else if ((valid_sum_count - sum_in_page) <=
2547                 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2548                 return 2;
2549         return 3;
2550 }
2551
2552 /*
2553  * Caller should put this summary page
2554  */
2555 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2556 {
2557         if (unlikely(f2fs_cp_error(sbi)))
2558                 return ERR_PTR(-EIO);
2559         return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2560 }
2561
2562 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2563                                         void *src, block_t blk_addr)
2564 {
2565         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2566
2567         memcpy(page_address(page), src, PAGE_SIZE);
2568         set_page_dirty(page);
2569         f2fs_put_page(page, 1);
2570 }
2571
2572 static void write_sum_page(struct f2fs_sb_info *sbi,
2573                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
2574 {
2575         f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2576 }
2577
2578 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2579                                                 int type, block_t blk_addr)
2580 {
2581         struct curseg_info *curseg = CURSEG_I(sbi, type);
2582         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2583         struct f2fs_summary_block *src = curseg->sum_blk;
2584         struct f2fs_summary_block *dst;
2585
2586         dst = (struct f2fs_summary_block *)page_address(page);
2587         memset(dst, 0, PAGE_SIZE);
2588
2589         mutex_lock(&curseg->curseg_mutex);
2590
2591         down_read(&curseg->journal_rwsem);
2592         memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2593         up_read(&curseg->journal_rwsem);
2594
2595         memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2596         memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2597
2598         mutex_unlock(&curseg->curseg_mutex);
2599
2600         set_page_dirty(page);
2601         f2fs_put_page(page, 1);
2602 }
2603
2604 static int is_next_segment_free(struct f2fs_sb_info *sbi,
2605                                 struct curseg_info *curseg, int type)
2606 {
2607         unsigned int segno = curseg->segno + 1;
2608         struct free_segmap_info *free_i = FREE_I(sbi);
2609
2610         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2611                 return !test_bit(segno, free_i->free_segmap);
2612         return 0;
2613 }
2614
2615 /*
2616  * Find a new segment from the free segments bitmap to right order
2617  * This function should be returned with success, otherwise BUG
2618  */
2619 static void get_new_segment(struct f2fs_sb_info *sbi,
2620                         unsigned int *newseg, bool new_sec, int dir)
2621 {
2622         struct free_segmap_info *free_i = FREE_I(sbi);
2623         unsigned int segno, secno, zoneno;
2624         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2625         unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2626         unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2627         unsigned int left_start = hint;
2628         bool init = true;
2629         int go_left = 0;
2630         int i;
2631
2632         spin_lock(&free_i->segmap_lock);
2633
2634         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2635                 segno = find_next_zero_bit(free_i->free_segmap,
2636                         GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2637                 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2638                         goto got_it;
2639         }
2640 find_other_zone:
2641         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2642         if (secno >= MAIN_SECS(sbi)) {
2643                 if (dir == ALLOC_RIGHT) {
2644                         secno = find_first_zero_bit(free_i->free_secmap,
2645                                                         MAIN_SECS(sbi));
2646                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2647                 } else {
2648                         go_left = 1;
2649                         left_start = hint - 1;
2650                 }
2651         }
2652         if (go_left == 0)
2653                 goto skip_left;
2654
2655         while (test_bit(left_start, free_i->free_secmap)) {
2656                 if (left_start > 0) {
2657                         left_start--;
2658                         continue;
2659                 }
2660                 left_start = find_first_zero_bit(free_i->free_secmap,
2661                                                         MAIN_SECS(sbi));
2662                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2663                 break;
2664         }
2665         secno = left_start;
2666 skip_left:
2667         segno = GET_SEG_FROM_SEC(sbi, secno);
2668         zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2669
2670         /* give up on finding another zone */
2671         if (!init)
2672                 goto got_it;
2673         if (sbi->secs_per_zone == 1)
2674                 goto got_it;
2675         if (zoneno == old_zoneno)
2676                 goto got_it;
2677         if (dir == ALLOC_LEFT) {
2678                 if (!go_left && zoneno + 1 >= total_zones)
2679                         goto got_it;
2680                 if (go_left && zoneno == 0)
2681                         goto got_it;
2682         }
2683         for (i = 0; i < NR_CURSEG_TYPE; i++)
2684                 if (CURSEG_I(sbi, i)->zone == zoneno)
2685                         break;
2686
2687         if (i < NR_CURSEG_TYPE) {
2688                 /* zone is in user, try another */
2689                 if (go_left)
2690                         hint = zoneno * sbi->secs_per_zone - 1;
2691                 else if (zoneno + 1 >= total_zones)
2692                         hint = 0;
2693                 else
2694                         hint = (zoneno + 1) * sbi->secs_per_zone;
2695                 init = false;
2696                 goto find_other_zone;
2697         }
2698 got_it:
2699         /* set it as dirty segment in free segmap */
2700         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2701         __set_inuse(sbi, segno);
2702         *newseg = segno;
2703         spin_unlock(&free_i->segmap_lock);
2704 }
2705
2706 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2707 {
2708         struct curseg_info *curseg = CURSEG_I(sbi, type);
2709         struct summary_footer *sum_footer;
2710         unsigned short seg_type = curseg->seg_type;
2711
2712         curseg->inited = true;
2713         curseg->segno = curseg->next_segno;
2714         curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2715         curseg->next_blkoff = 0;
2716         curseg->next_segno = NULL_SEGNO;
2717
2718         sum_footer = &(curseg->sum_blk->footer);
2719         memset(sum_footer, 0, sizeof(struct summary_footer));
2720
2721         sanity_check_seg_type(sbi, seg_type);
2722
2723         if (IS_DATASEG(seg_type))
2724                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2725         if (IS_NODESEG(seg_type))
2726                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2727         __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2728 }
2729
2730 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2731 {
2732         struct curseg_info *curseg = CURSEG_I(sbi, type);
2733         unsigned short seg_type = curseg->seg_type;
2734
2735         sanity_check_seg_type(sbi, seg_type);
2736         if (f2fs_need_rand_seg(sbi))
2737                 return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
2738
2739         /* if segs_per_sec is large than 1, we need to keep original policy. */
2740         if (__is_large_section(sbi))
2741                 return curseg->segno;
2742
2743         /* inmem log may not locate on any segment after mount */
2744         if (!curseg->inited)
2745                 return 0;
2746
2747         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2748                 return 0;
2749
2750         if (test_opt(sbi, NOHEAP) &&
2751                 (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
2752                 return 0;
2753
2754         if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2755                 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2756
2757         /* find segments from 0 to reuse freed segments */
2758         if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2759                 return 0;
2760
2761         return curseg->segno;
2762 }
2763
2764 /*
2765  * Allocate a current working segment.
2766  * This function always allocates a free segment in LFS manner.
2767  */
2768 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2769 {
2770         struct curseg_info *curseg = CURSEG_I(sbi, type);
2771         unsigned short seg_type = curseg->seg_type;
2772         unsigned int segno = curseg->segno;
2773         int dir = ALLOC_LEFT;
2774
2775         if (curseg->inited)
2776                 write_sum_page(sbi, curseg->sum_blk,
2777                                 GET_SUM_BLOCK(sbi, segno));
2778         if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
2779                 dir = ALLOC_RIGHT;
2780
2781         if (test_opt(sbi, NOHEAP))
2782                 dir = ALLOC_RIGHT;
2783
2784         segno = __get_next_segno(sbi, type);
2785         get_new_segment(sbi, &segno, new_sec, dir);
2786         curseg->next_segno = segno;
2787         reset_curseg(sbi, type, 1);
2788         curseg->alloc_type = LFS;
2789         if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
2790                 curseg->fragment_remained_chunk =
2791                                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
2792 }
2793
2794 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
2795                                         int segno, block_t start)
2796 {
2797         struct seg_entry *se = get_seg_entry(sbi, segno);
2798         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2799         unsigned long *target_map = SIT_I(sbi)->tmp_map;
2800         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2801         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2802         int i;
2803
2804         for (i = 0; i < entries; i++)
2805                 target_map[i] = ckpt_map[i] | cur_map[i];
2806
2807         return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2808 }
2809
2810 static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
2811                 struct curseg_info *seg)
2812 {
2813         return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
2814 }
2815
2816 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2817 {
2818         return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
2819 }
2820
2821 /*
2822  * This function always allocates a used segment(from dirty seglist) by SSR
2823  * manner, so it should recover the existing segment information of valid blocks
2824  */
2825 static void change_curseg(struct f2fs_sb_info *sbi, int type)
2826 {
2827         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2828         struct curseg_info *curseg = CURSEG_I(sbi, type);
2829         unsigned int new_segno = curseg->next_segno;
2830         struct f2fs_summary_block *sum_node;
2831         struct page *sum_page;
2832
2833         write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
2834
2835         __set_test_and_inuse(sbi, new_segno);
2836
2837         mutex_lock(&dirty_i->seglist_lock);
2838         __remove_dirty_segment(sbi, new_segno, PRE);
2839         __remove_dirty_segment(sbi, new_segno, DIRTY);
2840         mutex_unlock(&dirty_i->seglist_lock);
2841
2842         reset_curseg(sbi, type, 1);
2843         curseg->alloc_type = SSR;
2844         curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
2845
2846         sum_page = f2fs_get_sum_page(sbi, new_segno);
2847         if (IS_ERR(sum_page)) {
2848                 /* GC won't be able to use stale summary pages by cp_error */
2849                 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2850                 return;
2851         }
2852         sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2853         memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2854         f2fs_put_page(sum_page, 1);
2855 }
2856
2857 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2858                                 int alloc_mode, unsigned long long age);
2859
2860 static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2861                                         int target_type, int alloc_mode,
2862                                         unsigned long long age)
2863 {
2864         struct curseg_info *curseg = CURSEG_I(sbi, type);
2865
2866         curseg->seg_type = target_type;
2867
2868         if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2869                 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2870
2871                 curseg->seg_type = se->type;
2872                 change_curseg(sbi, type);
2873         } else {
2874                 /* allocate cold segment by default */
2875                 curseg->seg_type = CURSEG_COLD_DATA;
2876                 new_curseg(sbi, type, true);
2877         }
2878         stat_inc_seg_type(sbi, curseg);
2879 }
2880
2881 static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
2882 {
2883         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2884
2885         if (!sbi->am.atgc_enabled)
2886                 return;
2887
2888         f2fs_down_read(&SM_I(sbi)->curseg_lock);
2889
2890         mutex_lock(&curseg->curseg_mutex);
2891         down_write(&SIT_I(sbi)->sentry_lock);
2892
2893         get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
2894
2895         up_write(&SIT_I(sbi)->sentry_lock);
2896         mutex_unlock(&curseg->curseg_mutex);
2897
2898         f2fs_up_read(&SM_I(sbi)->curseg_lock);
2899
2900 }
2901 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
2902 {
2903         __f2fs_init_atgc_curseg(sbi);
2904 }
2905
2906 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2907 {
2908         struct curseg_info *curseg = CURSEG_I(sbi, type);
2909
2910         mutex_lock(&curseg->curseg_mutex);
2911         if (!curseg->inited)
2912                 goto out;
2913
2914         if (get_valid_blocks(sbi, curseg->segno, false)) {
2915                 write_sum_page(sbi, curseg->sum_blk,
2916                                 GET_SUM_BLOCK(sbi, curseg->segno));
2917         } else {
2918                 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2919                 __set_test_and_free(sbi, curseg->segno, true);
2920                 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2921         }
2922 out:
2923         mutex_unlock(&curseg->curseg_mutex);
2924 }
2925
2926 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
2927 {
2928         __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2929
2930         if (sbi->am.atgc_enabled)
2931                 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2932 }
2933
2934 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2935 {
2936         struct curseg_info *curseg = CURSEG_I(sbi, type);
2937
2938         mutex_lock(&curseg->curseg_mutex);
2939         if (!curseg->inited)
2940                 goto out;
2941         if (get_valid_blocks(sbi, curseg->segno, false))
2942                 goto out;
2943
2944         mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2945         __set_test_and_inuse(sbi, curseg->segno);
2946         mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2947 out:
2948         mutex_unlock(&curseg->curseg_mutex);
2949 }
2950
2951 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
2952 {
2953         __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2954
2955         if (sbi->am.atgc_enabled)
2956                 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2957 }
2958
2959 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2960                                 int alloc_mode, unsigned long long age)
2961 {
2962         struct curseg_info *curseg = CURSEG_I(sbi, type);
2963         unsigned segno = NULL_SEGNO;
2964         unsigned short seg_type = curseg->seg_type;
2965         int i, cnt;
2966         bool reversed = false;
2967
2968         sanity_check_seg_type(sbi, seg_type);
2969
2970         /* f2fs_need_SSR() already forces to do this */
2971         if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
2972                 curseg->next_segno = segno;
2973                 return 1;
2974         }
2975
2976         /* For node segments, let's do SSR more intensively */
2977         if (IS_NODESEG(seg_type)) {
2978                 if (seg_type >= CURSEG_WARM_NODE) {
2979                         reversed = true;
2980                         i = CURSEG_COLD_NODE;
2981                 } else {
2982                         i = CURSEG_HOT_NODE;
2983                 }
2984                 cnt = NR_CURSEG_NODE_TYPE;
2985         } else {
2986                 if (seg_type >= CURSEG_WARM_DATA) {
2987                         reversed = true;
2988                         i = CURSEG_COLD_DATA;
2989                 } else {
2990                         i = CURSEG_HOT_DATA;
2991                 }
2992                 cnt = NR_CURSEG_DATA_TYPE;
2993         }
2994
2995         for (; cnt-- > 0; reversed ? i-- : i++) {
2996                 if (i == seg_type)
2997                         continue;
2998                 if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
2999                         curseg->next_segno = segno;
3000                         return 1;
3001                 }
3002         }
3003
3004         /* find valid_blocks=0 in dirty list */
3005         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
3006                 segno = get_free_segment(sbi);
3007                 if (segno != NULL_SEGNO) {
3008                         curseg->next_segno = segno;
3009                         return 1;
3010                 }
3011         }
3012         return 0;
3013 }
3014
3015 static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
3016 {
3017         struct curseg_info *curseg = CURSEG_I(sbi, type);
3018
3019         if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
3020             curseg->seg_type == CURSEG_WARM_NODE)
3021                 return true;
3022         if (curseg->alloc_type == LFS &&
3023             is_next_segment_free(sbi, curseg, type) &&
3024             likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3025                 return true;
3026         if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
3027                 return true;
3028         return false;
3029 }
3030
3031 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
3032                                         unsigned int start, unsigned int end)
3033 {
3034         struct curseg_info *curseg = CURSEG_I(sbi, type);
3035         unsigned int segno;
3036
3037         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3038         mutex_lock(&curseg->curseg_mutex);
3039         down_write(&SIT_I(sbi)->sentry_lock);
3040
3041         segno = CURSEG_I(sbi, type)->segno;
3042         if (segno < start || segno > end)
3043                 goto unlock;
3044
3045         if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
3046                 change_curseg(sbi, type);
3047         else
3048                 new_curseg(sbi, type, true);
3049
3050         stat_inc_seg_type(sbi, curseg);
3051
3052         locate_dirty_segment(sbi, segno);
3053 unlock:
3054         up_write(&SIT_I(sbi)->sentry_lock);
3055
3056         if (segno != curseg->segno)
3057                 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
3058                             type, segno, curseg->segno);
3059
3060         mutex_unlock(&curseg->curseg_mutex);
3061         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3062 }
3063
3064 static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
3065                                                 bool new_sec, bool force)
3066 {
3067         struct curseg_info *curseg = CURSEG_I(sbi, type);
3068         unsigned int old_segno;
3069
3070         if (!force && curseg->inited &&
3071             !curseg->next_blkoff &&
3072             !get_valid_blocks(sbi, curseg->segno, new_sec) &&
3073             !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
3074                 return;
3075
3076         old_segno = curseg->segno;
3077         new_curseg(sbi, type, true);
3078         stat_inc_seg_type(sbi, curseg);
3079         locate_dirty_segment(sbi, old_segno);
3080 }
3081
3082 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
3083 {
3084         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3085         down_write(&SIT_I(sbi)->sentry_lock);
3086         __allocate_new_segment(sbi, type, true, force);
3087         up_write(&SIT_I(sbi)->sentry_lock);
3088         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3089 }
3090
3091 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
3092 {
3093         int i;
3094
3095         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3096         down_write(&SIT_I(sbi)->sentry_lock);
3097         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
3098                 __allocate_new_segment(sbi, i, false, false);
3099         up_write(&SIT_I(sbi)->sentry_lock);
3100         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3101 }
3102
3103 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
3104                                                 struct cp_control *cpc)
3105 {
3106         __u64 trim_start = cpc->trim_start;
3107         bool has_candidate = false;
3108
3109         down_write(&SIT_I(sbi)->sentry_lock);
3110         for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
3111                 if (add_discard_addrs(sbi, cpc, true)) {
3112                         has_candidate = true;
3113                         break;
3114                 }
3115         }
3116         up_write(&SIT_I(sbi)->sentry_lock);
3117
3118         cpc->trim_start = trim_start;
3119         return has_candidate;
3120 }
3121
3122 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
3123                                         struct discard_policy *dpolicy,
3124                                         unsigned int start, unsigned int end)
3125 {
3126         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
3127         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
3128         struct rb_node **insert_p = NULL, *insert_parent = NULL;
3129         struct discard_cmd *dc;
3130         struct blk_plug plug;
3131         int issued;
3132         unsigned int trimmed = 0;
3133
3134 next:
3135         issued = 0;
3136
3137         mutex_lock(&dcc->cmd_lock);
3138         if (unlikely(dcc->rbtree_check))
3139                 f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
3140
3141         dc = __lookup_discard_cmd_ret(&dcc->root, start,
3142                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
3143         if (!dc)
3144                 dc = next_dc;
3145
3146         blk_start_plug(&plug);
3147
3148         while (dc && dc->di.lstart <= end) {
3149                 struct rb_node *node;
3150                 int err = 0;
3151
3152                 if (dc->di.len < dpolicy->granularity)
3153                         goto skip;
3154
3155                 if (dc->state != D_PREP) {
3156                         list_move_tail(&dc->list, &dcc->fstrim_list);
3157                         goto skip;
3158                 }
3159
3160                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
3161
3162                 if (issued >= dpolicy->max_requests) {
3163                         start = dc->di.lstart + dc->di.len;
3164
3165                         if (err)
3166                                 __remove_discard_cmd(sbi, dc);
3167
3168                         blk_finish_plug(&plug);
3169                         mutex_unlock(&dcc->cmd_lock);
3170                         trimmed += __wait_all_discard_cmd(sbi, NULL);
3171                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
3172                         goto next;
3173                 }
3174 skip:
3175                 node = rb_next(&dc->rb_node);
3176                 if (err)
3177                         __remove_discard_cmd(sbi, dc);
3178                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3179
3180                 if (fatal_signal_pending(current))
3181                         break;
3182         }
3183
3184         blk_finish_plug(&plug);
3185         mutex_unlock(&dcc->cmd_lock);
3186
3187         return trimmed;
3188 }
3189
3190 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3191 {
3192         __u64 start = F2FS_BYTES_TO_BLK(range->start);
3193         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3194         unsigned int start_segno, end_segno;
3195         block_t start_block, end_block;
3196         struct cp_control cpc;
3197         struct discard_policy dpolicy;
3198         unsigned long long trimmed = 0;
3199         int err = 0;
3200         bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3201
3202         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3203                 return -EINVAL;
3204
3205         if (end < MAIN_BLKADDR(sbi))
3206                 goto out;
3207
3208         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3209                 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3210                 return -EFSCORRUPTED;
3211         }
3212
3213         /* start/end segment number in main_area */
3214         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3215         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3216                                                 GET_SEGNO(sbi, end);
3217         if (need_align) {
3218                 start_segno = rounddown(start_segno, sbi->segs_per_sec);
3219                 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
3220         }
3221
3222         cpc.reason = CP_DISCARD;
3223         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3224         cpc.trim_start = start_segno;
3225         cpc.trim_end = end_segno;
3226
3227         if (sbi->discard_blks == 0)
3228                 goto out;
3229
3230         f2fs_down_write(&sbi->gc_lock);
3231         err = f2fs_write_checkpoint(sbi, &cpc);
3232         f2fs_up_write(&sbi->gc_lock);
3233         if (err)
3234                 goto out;
3235
3236         /*
3237          * We filed discard candidates, but actually we don't need to wait for
3238          * all of them, since they'll be issued in idle time along with runtime
3239          * discard option. User configuration looks like using runtime discard
3240          * or periodic fstrim instead of it.
3241          */
3242         if (f2fs_realtime_discard_enable(sbi))
3243                 goto out;
3244
3245         start_block = START_BLOCK(sbi, start_segno);
3246         end_block = START_BLOCK(sbi, end_segno + 1);
3247
3248         __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3249         trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3250                                         start_block, end_block);
3251
3252         trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3253                                         start_block, end_block);
3254 out:
3255         if (!err)
3256                 range->len = F2FS_BLK_TO_BYTES(trimmed);
3257         return err;
3258 }
3259
3260 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
3261 {
3262         switch (hint) {
3263         case WRITE_LIFE_SHORT:
3264                 return CURSEG_HOT_DATA;
3265         case WRITE_LIFE_EXTREME:
3266                 return CURSEG_COLD_DATA;
3267         default:
3268                 return CURSEG_WARM_DATA;
3269         }
3270 }
3271
3272 static int __get_segment_type_2(struct f2fs_io_info *fio)
3273 {
3274         if (fio->type == DATA)
3275                 return CURSEG_HOT_DATA;
3276         else
3277                 return CURSEG_HOT_NODE;
3278 }
3279
3280 static int __get_segment_type_4(struct f2fs_io_info *fio)
3281 {
3282         if (fio->type == DATA) {
3283                 struct inode *inode = fio->page->mapping->host;
3284
3285                 if (S_ISDIR(inode->i_mode))
3286                         return CURSEG_HOT_DATA;
3287                 else
3288                         return CURSEG_COLD_DATA;
3289         } else {
3290                 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3291                         return CURSEG_WARM_NODE;
3292                 else
3293                         return CURSEG_COLD_NODE;
3294         }
3295 }
3296
3297 static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
3298 {
3299         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3300         struct extent_info ei = {};
3301
3302         if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
3303                 if (!ei.age)
3304                         return NO_CHECK_TYPE;
3305                 if (ei.age <= sbi->hot_data_age_threshold)
3306                         return CURSEG_HOT_DATA;
3307                 if (ei.age <= sbi->warm_data_age_threshold)
3308                         return CURSEG_WARM_DATA;
3309                 return CURSEG_COLD_DATA;
3310         }
3311         return NO_CHECK_TYPE;
3312 }
3313
3314 static int __get_segment_type_6(struct f2fs_io_info *fio)
3315 {
3316         if (fio->type == DATA) {
3317                 struct inode *inode = fio->page->mapping->host;
3318                 int type;
3319
3320                 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
3321                         return CURSEG_COLD_DATA_PINNED;
3322
3323                 if (page_private_gcing(fio->page)) {
3324                         if (fio->sbi->am.atgc_enabled &&
3325                                 (fio->io_type == FS_DATA_IO) &&
3326                                 (fio->sbi->gc_mode != GC_URGENT_HIGH))
3327                                 return CURSEG_ALL_DATA_ATGC;
3328                         else
3329                                 return CURSEG_COLD_DATA;
3330                 }
3331                 if (file_is_cold(inode) || f2fs_need_compress_data(inode))
3332                         return CURSEG_COLD_DATA;
3333
3334                 type = __get_age_segment_type(inode, fio->page->index);
3335                 if (type != NO_CHECK_TYPE)
3336                         return type;
3337
3338                 if (file_is_hot(inode) ||
3339                                 is_inode_flag_set(inode, FI_HOT_DATA) ||
3340                                 f2fs_is_cow_file(inode))
3341                         return CURSEG_HOT_DATA;
3342                 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3343         } else {
3344                 if (IS_DNODE(fio->page))
3345                         return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3346                                                 CURSEG_HOT_NODE;
3347                 return CURSEG_COLD_NODE;
3348         }
3349 }
3350
3351 static int __get_segment_type(struct f2fs_io_info *fio)
3352 {
3353         int type = 0;
3354
3355         switch (F2FS_OPTION(fio->sbi).active_logs) {
3356         case 2:
3357                 type = __get_segment_type_2(fio);
3358                 break;
3359         case 4:
3360                 type = __get_segment_type_4(fio);
3361                 break;
3362         case 6:
3363                 type = __get_segment_type_6(fio);
3364                 break;
3365         default:
3366                 f2fs_bug_on(fio->sbi, true);
3367         }
3368
3369         if (IS_HOT(type))
3370                 fio->temp = HOT;
3371         else if (IS_WARM(type))
3372                 fio->temp = WARM;
3373         else
3374                 fio->temp = COLD;
3375         return type;
3376 }
3377
3378 static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
3379                 struct curseg_info *seg)
3380 {
3381         /* To allocate block chunks in different sizes, use random number */
3382         if (--seg->fragment_remained_chunk > 0)
3383                 return;
3384
3385         seg->fragment_remained_chunk =
3386                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
3387         seg->next_blkoff +=
3388                 get_random_u32_inclusive(1, sbi->max_fragment_hole);
3389 }
3390
3391 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3392                 block_t old_blkaddr, block_t *new_blkaddr,
3393                 struct f2fs_summary *sum, int type,
3394                 struct f2fs_io_info *fio)
3395 {
3396         struct sit_info *sit_i = SIT_I(sbi);
3397         struct curseg_info *curseg = CURSEG_I(sbi, type);
3398         unsigned long long old_mtime;
3399         bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3400         struct seg_entry *se = NULL;
3401         bool segment_full = false;
3402
3403         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3404
3405         mutex_lock(&curseg->curseg_mutex);
3406         down_write(&sit_i->sentry_lock);
3407
3408         if (from_gc) {
3409                 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3410                 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3411                 sanity_check_seg_type(sbi, se->type);
3412                 f2fs_bug_on(sbi, IS_NODESEG(se->type));
3413         }
3414         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3415
3416         f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
3417
3418         f2fs_wait_discard_bio(sbi, *new_blkaddr);
3419
3420         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3421         if (curseg->alloc_type == SSR) {
3422                 curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
3423         } else {
3424                 curseg->next_blkoff++;
3425                 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
3426                         f2fs_randomize_chunk(sbi, curseg);
3427         }
3428         if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
3429                 segment_full = true;
3430         stat_inc_block_count(sbi, curseg);
3431
3432         if (from_gc) {
3433                 old_mtime = get_segment_mtime(sbi, old_blkaddr);
3434         } else {
3435                 update_segment_mtime(sbi, old_blkaddr, 0);
3436                 old_mtime = 0;
3437         }
3438         update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3439
3440         /*
3441          * SIT information should be updated before segment allocation,
3442          * since SSR needs latest valid block information.
3443          */
3444         update_sit_entry(sbi, *new_blkaddr, 1);
3445         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3446                 update_sit_entry(sbi, old_blkaddr, -1);
3447
3448         /*
3449          * If the current segment is full, flush it out and replace it with a
3450          * new segment.
3451          */
3452         if (segment_full) {
3453                 if (from_gc) {
3454                         get_atssr_segment(sbi, type, se->type,
3455                                                 AT_SSR, se->mtime);
3456                 } else {
3457                         if (need_new_seg(sbi, type))
3458                                 new_curseg(sbi, type, false);
3459                         else
3460                                 change_curseg(sbi, type);
3461                         stat_inc_seg_type(sbi, curseg);
3462                 }
3463         }
3464         /*
3465          * segment dirty status should be updated after segment allocation,
3466          * so we just need to update status only one time after previous
3467          * segment being closed.
3468          */
3469         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3470         locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3471
3472         if (IS_DATASEG(type))
3473                 atomic64_inc(&sbi->allocated_data_blocks);
3474
3475         up_write(&sit_i->sentry_lock);
3476
3477         if (page && IS_NODESEG(type)) {
3478                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3479
3480                 f2fs_inode_chksum_set(sbi, page);
3481         }
3482
3483         if (fio) {
3484                 struct f2fs_bio_info *io;
3485
3486                 if (F2FS_IO_ALIGNED(sbi))
3487                         fio->retry = 0;
3488
3489                 INIT_LIST_HEAD(&fio->list);
3490                 fio->in_list = 1;
3491                 io = sbi->write_io[fio->type] + fio->temp;
3492                 spin_lock(&io->io_lock);
3493                 list_add_tail(&fio->list, &io->io_list);
3494                 spin_unlock(&io->io_lock);
3495         }
3496
3497         mutex_unlock(&curseg->curseg_mutex);
3498
3499         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3500 }
3501
3502 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3503                                         block_t blkaddr, unsigned int blkcnt)
3504 {
3505         if (!f2fs_is_multi_device(sbi))
3506                 return;
3507
3508         while (1) {
3509                 unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
3510                 unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
3511
3512                 /* update device state for fsync */
3513                 f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
3514
3515                 /* update device state for checkpoint */
3516                 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3517                         spin_lock(&sbi->dev_lock);
3518                         f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3519                         spin_unlock(&sbi->dev_lock);
3520                 }
3521
3522                 if (blkcnt <= blks)
3523                         break;
3524                 blkcnt -= blks;
3525                 blkaddr += blks;
3526         }
3527 }
3528
3529 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3530 {
3531         int type = __get_segment_type(fio);
3532         bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3533
3534         if (keep_order)
3535                 f2fs_down_read(&fio->sbi->io_order_lock);
3536 reallocate:
3537         f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3538                         &fio->new_blkaddr, sum, type, fio);
3539         if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) {
3540                 invalidate_mapping_pages(META_MAPPING(fio->sbi),
3541                                         fio->old_blkaddr, fio->old_blkaddr);
3542                 f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr);
3543         }
3544
3545         /* writeout dirty page into bdev */
3546         f2fs_submit_page_write(fio);
3547         if (fio->retry) {
3548                 fio->old_blkaddr = fio->new_blkaddr;
3549                 goto reallocate;
3550         }
3551
3552         f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
3553
3554         if (keep_order)
3555                 f2fs_up_read(&fio->sbi->io_order_lock);
3556 }
3557
3558 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3559                                         enum iostat_type io_type)
3560 {
3561         struct f2fs_io_info fio = {
3562                 .sbi = sbi,
3563                 .type = META,
3564                 .temp = HOT,
3565                 .op = REQ_OP_WRITE,
3566                 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3567                 .old_blkaddr = page->index,
3568                 .new_blkaddr = page->index,
3569                 .page = page,
3570                 .encrypted_page = NULL,
3571                 .in_list = 0,
3572         };
3573
3574         if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3575                 fio.op_flags &= ~REQ_META;
3576
3577         set_page_writeback(page);
3578         f2fs_submit_page_write(&fio);
3579
3580         stat_inc_meta_count(sbi, page->index);
3581         f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
3582 }
3583
3584 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3585 {
3586         struct f2fs_summary sum;
3587
3588         set_summary(&sum, nid, 0, 0);
3589         do_write_page(&sum, fio);
3590
3591         f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
3592 }
3593
3594 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3595                                         struct f2fs_io_info *fio)
3596 {
3597         struct f2fs_sb_info *sbi = fio->sbi;
3598         struct f2fs_summary sum;
3599
3600         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3601         if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
3602                 f2fs_update_age_extent_cache(dn);
3603         set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3604         do_write_page(&sum, fio);
3605         f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3606
3607         f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
3608 }
3609
3610 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3611 {
3612         int err;
3613         struct f2fs_sb_info *sbi = fio->sbi;
3614         unsigned int segno;
3615
3616         fio->new_blkaddr = fio->old_blkaddr;
3617         /* i/o temperature is needed for passing down write hints */
3618         __get_segment_type(fio);
3619
3620         segno = GET_SEGNO(sbi, fio->new_blkaddr);
3621
3622         if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3623                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3624                 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3625                           __func__, segno);
3626                 err = -EFSCORRUPTED;
3627                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
3628                 goto drop_bio;
3629         }
3630
3631         if (f2fs_cp_error(sbi)) {
3632                 err = -EIO;
3633                 goto drop_bio;
3634         }
3635
3636         if (fio->post_read)
3637                 invalidate_mapping_pages(META_MAPPING(sbi),
3638                                 fio->new_blkaddr, fio->new_blkaddr);
3639
3640         stat_inc_inplace_blocks(fio->sbi);
3641
3642         if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
3643                 err = f2fs_merge_page_bio(fio);
3644         else
3645                 err = f2fs_submit_page_bio(fio);
3646         if (!err) {
3647                 f2fs_update_device_state(fio->sbi, fio->ino,
3648                                                 fio->new_blkaddr, 1);
3649                 f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
3650                                                 fio->io_type, F2FS_BLKSIZE);
3651         }
3652
3653         return err;
3654 drop_bio:
3655         if (fio->bio && *(fio->bio)) {
3656                 struct bio *bio = *(fio->bio);
3657
3658                 bio->bi_status = BLK_STS_IOERR;
3659                 bio_endio(bio);
3660                 *(fio->bio) = NULL;
3661         }
3662         return err;
3663 }
3664
3665 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3666                                                 unsigned int segno)
3667 {
3668         int i;
3669
3670         for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3671                 if (CURSEG_I(sbi, i)->segno == segno)
3672                         break;
3673         }
3674         return i;
3675 }
3676
3677 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3678                                 block_t old_blkaddr, block_t new_blkaddr,
3679                                 bool recover_curseg, bool recover_newaddr,
3680                                 bool from_gc)
3681 {
3682         struct sit_info *sit_i = SIT_I(sbi);
3683         struct curseg_info *curseg;
3684         unsigned int segno, old_cursegno;
3685         struct seg_entry *se;
3686         int type;
3687         unsigned short old_blkoff;
3688         unsigned char old_alloc_type;
3689
3690         segno = GET_SEGNO(sbi, new_blkaddr);
3691         se = get_seg_entry(sbi, segno);
3692         type = se->type;
3693
3694         f2fs_down_write(&SM_I(sbi)->curseg_lock);
3695
3696         if (!recover_curseg) {
3697                 /* for recovery flow */
3698                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3699                         if (old_blkaddr == NULL_ADDR)
3700                                 type = CURSEG_COLD_DATA;
3701                         else
3702                                 type = CURSEG_WARM_DATA;
3703                 }
3704         } else {
3705                 if (IS_CURSEG(sbi, segno)) {
3706                         /* se->type is volatile as SSR allocation */
3707                         type = __f2fs_get_curseg(sbi, segno);
3708                         f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3709                 } else {
3710                         type = CURSEG_WARM_DATA;
3711                 }
3712         }
3713
3714         f2fs_bug_on(sbi, !IS_DATASEG(type));
3715         curseg = CURSEG_I(sbi, type);
3716
3717         mutex_lock(&curseg->curseg_mutex);
3718         down_write(&sit_i->sentry_lock);
3719
3720         old_cursegno = curseg->segno;
3721         old_blkoff = curseg->next_blkoff;
3722         old_alloc_type = curseg->alloc_type;
3723
3724         /* change the current segment */
3725         if (segno != curseg->segno) {
3726                 curseg->next_segno = segno;
3727                 change_curseg(sbi, type);
3728         }
3729
3730         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3731         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3732
3733         if (!recover_curseg || recover_newaddr) {
3734                 if (!from_gc)
3735                         update_segment_mtime(sbi, new_blkaddr, 0);
3736                 update_sit_entry(sbi, new_blkaddr, 1);
3737         }
3738         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3739                 invalidate_mapping_pages(META_MAPPING(sbi),
3740                                         old_blkaddr, old_blkaddr);
3741                 f2fs_invalidate_compress_page(sbi, old_blkaddr);
3742                 if (!from_gc)
3743                         update_segment_mtime(sbi, old_blkaddr, 0);
3744                 update_sit_entry(sbi, old_blkaddr, -1);
3745         }
3746
3747         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3748         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3749
3750         locate_dirty_segment(sbi, old_cursegno);
3751
3752         if (recover_curseg) {
3753                 if (old_cursegno != curseg->segno) {
3754                         curseg->next_segno = old_cursegno;
3755                         change_curseg(sbi, type);
3756                 }
3757                 curseg->next_blkoff = old_blkoff;
3758                 curseg->alloc_type = old_alloc_type;
3759         }
3760
3761         up_write(&sit_i->sentry_lock);
3762         mutex_unlock(&curseg->curseg_mutex);
3763         f2fs_up_write(&SM_I(sbi)->curseg_lock);
3764 }
3765
3766 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3767                                 block_t old_addr, block_t new_addr,
3768                                 unsigned char version, bool recover_curseg,
3769                                 bool recover_newaddr)
3770 {
3771         struct f2fs_summary sum;
3772
3773         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3774
3775         f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3776                                         recover_curseg, recover_newaddr, false);
3777
3778         f2fs_update_data_blkaddr(dn, new_addr);
3779 }
3780
3781 void f2fs_wait_on_page_writeback(struct page *page,
3782                                 enum page_type type, bool ordered, bool locked)
3783 {
3784         if (PageWriteback(page)) {
3785                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3786
3787                 /* submit cached LFS IO */
3788                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3789                 /* submit cached IPU IO */
3790                 f2fs_submit_merged_ipu_write(sbi, NULL, page);
3791                 if (ordered) {
3792                         wait_on_page_writeback(page);
3793                         f2fs_bug_on(sbi, locked && PageWriteback(page));
3794                 } else {
3795                         wait_for_stable_page(page);
3796                 }
3797         }
3798 }
3799
3800 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3801 {
3802         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3803         struct page *cpage;
3804
3805         if (!f2fs_post_read_required(inode))
3806                 return;
3807
3808         if (!__is_valid_data_blkaddr(blkaddr))
3809                 return;
3810
3811         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3812         if (cpage) {
3813                 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3814                 f2fs_put_page(cpage, 1);
3815         }
3816 }
3817
3818 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3819                                                                 block_t len)
3820 {
3821         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3822         block_t i;
3823
3824         if (!f2fs_post_read_required(inode))
3825                 return;
3826
3827         for (i = 0; i < len; i++)
3828                 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3829
3830         invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
3831 }
3832
3833 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3834 {
3835         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3836         struct curseg_info *seg_i;
3837         unsigned char *kaddr;
3838         struct page *page;
3839         block_t start;
3840         int i, j, offset;
3841
3842         start = start_sum_block(sbi);
3843
3844         page = f2fs_get_meta_page(sbi, start++);
3845         if (IS_ERR(page))
3846                 return PTR_ERR(page);
3847         kaddr = (unsigned char *)page_address(page);
3848
3849         /* Step 1: restore nat cache */
3850         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3851         memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3852
3853         /* Step 2: restore sit cache */
3854         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3855         memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3856         offset = 2 * SUM_JOURNAL_SIZE;
3857
3858         /* Step 3: restore summary entries */
3859         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3860                 unsigned short blk_off;
3861                 unsigned int segno;
3862
3863                 seg_i = CURSEG_I(sbi, i);
3864                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3865                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3866                 seg_i->next_segno = segno;
3867                 reset_curseg(sbi, i, 0);
3868                 seg_i->alloc_type = ckpt->alloc_type[i];
3869                 seg_i->next_blkoff = blk_off;
3870
3871                 if (seg_i->alloc_type == SSR)
3872                         blk_off = sbi->blocks_per_seg;
3873
3874                 for (j = 0; j < blk_off; j++) {
3875                         struct f2fs_summary *s;
3876
3877                         s = (struct f2fs_summary *)(kaddr + offset);
3878                         seg_i->sum_blk->entries[j] = *s;
3879                         offset += SUMMARY_SIZE;
3880                         if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3881                                                 SUM_FOOTER_SIZE)
3882                                 continue;
3883
3884                         f2fs_put_page(page, 1);
3885                         page = NULL;
3886
3887                         page = f2fs_get_meta_page(sbi, start++);
3888                         if (IS_ERR(page))
3889                                 return PTR_ERR(page);
3890                         kaddr = (unsigned char *)page_address(page);
3891                         offset = 0;
3892                 }
3893         }
3894         f2fs_put_page(page, 1);
3895         return 0;
3896 }
3897
3898 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3899 {
3900         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3901         struct f2fs_summary_block *sum;
3902         struct curseg_info *curseg;
3903         struct page *new;
3904         unsigned short blk_off;
3905         unsigned int segno = 0;
3906         block_t blk_addr = 0;
3907         int err = 0;
3908
3909         /* get segment number and block addr */
3910         if (IS_DATASEG(type)) {
3911                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3912                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3913                                                         CURSEG_HOT_DATA]);
3914                 if (__exist_node_summaries(sbi))
3915                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
3916                 else
3917                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3918         } else {
3919                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3920                                                         CURSEG_HOT_NODE]);
3921                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3922                                                         CURSEG_HOT_NODE]);
3923                 if (__exist_node_summaries(sbi))
3924                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3925                                                         type - CURSEG_HOT_NODE);
3926                 else
3927                         blk_addr = GET_SUM_BLOCK(sbi, segno);
3928         }
3929
3930         new = f2fs_get_meta_page(sbi, blk_addr);
3931         if (IS_ERR(new))
3932                 return PTR_ERR(new);
3933         sum = (struct f2fs_summary_block *)page_address(new);
3934
3935         if (IS_NODESEG(type)) {
3936                 if (__exist_node_summaries(sbi)) {
3937                         struct f2fs_summary *ns = &sum->entries[0];
3938                         int i;
3939
3940                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3941                                 ns->version = 0;
3942                                 ns->ofs_in_node = 0;
3943                         }
3944                 } else {
3945                         err = f2fs_restore_node_summary(sbi, segno, sum);
3946                         if (err)
3947                                 goto out;
3948                 }
3949         }
3950
3951         /* set uncompleted segment to curseg */
3952         curseg = CURSEG_I(sbi, type);
3953         mutex_lock(&curseg->curseg_mutex);
3954
3955         /* update journal info */
3956         down_write(&curseg->journal_rwsem);
3957         memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3958         up_write(&curseg->journal_rwsem);
3959
3960         memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3961         memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3962         curseg->next_segno = segno;
3963         reset_curseg(sbi, type, 0);
3964         curseg->alloc_type = ckpt->alloc_type[type];
3965         curseg->next_blkoff = blk_off;
3966         mutex_unlock(&curseg->curseg_mutex);
3967 out:
3968         f2fs_put_page(new, 1);
3969         return err;
3970 }
3971
3972 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3973 {
3974         struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3975         struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3976         int type = CURSEG_HOT_DATA;
3977         int err;
3978
3979         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3980                 int npages = f2fs_npages_for_summary_flush(sbi, true);
3981
3982                 if (npages >= 2)
3983                         f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3984                                                         META_CP, true);
3985
3986                 /* restore for compacted data summary */
3987                 err = read_compacted_summaries(sbi);
3988                 if (err)
3989                         return err;
3990                 type = CURSEG_HOT_NODE;
3991         }
3992
3993         if (__exist_node_summaries(sbi))
3994                 f2fs_ra_meta_pages(sbi,
3995                                 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
3996                                 NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
3997
3998         for (; type <= CURSEG_COLD_NODE; type++) {
3999                 err = read_normal_summaries(sbi, type);
4000                 if (err)
4001                         return err;
4002         }
4003
4004         /* sanity check for summary blocks */
4005         if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
4006                         sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
4007                 f2fs_err(sbi, "invalid journal entries nats %u sits %u",
4008                          nats_in_cursum(nat_j), sits_in_cursum(sit_j));
4009                 return -EINVAL;
4010         }
4011
4012         return 0;
4013 }
4014
4015 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
4016 {
4017         struct page *page;
4018         unsigned char *kaddr;
4019         struct f2fs_summary *summary;
4020         struct curseg_info *seg_i;
4021         int written_size = 0;
4022         int i, j;
4023
4024         page = f2fs_grab_meta_page(sbi, blkaddr++);
4025         kaddr = (unsigned char *)page_address(page);
4026         memset(kaddr, 0, PAGE_SIZE);
4027
4028         /* Step 1: write nat cache */
4029         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4030         memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
4031         written_size += SUM_JOURNAL_SIZE;
4032
4033         /* Step 2: write sit cache */
4034         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4035         memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
4036         written_size += SUM_JOURNAL_SIZE;
4037
4038         /* Step 3: write summary entries */
4039         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4040                 seg_i = CURSEG_I(sbi, i);
4041                 for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
4042                         if (!page) {
4043                                 page = f2fs_grab_meta_page(sbi, blkaddr++);
4044                                 kaddr = (unsigned char *)page_address(page);
4045                                 memset(kaddr, 0, PAGE_SIZE);
4046                                 written_size = 0;
4047                         }
4048                         summary = (struct f2fs_summary *)(kaddr + written_size);
4049                         *summary = seg_i->sum_blk->entries[j];
4050                         written_size += SUMMARY_SIZE;
4051
4052                         if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
4053                                                         SUM_FOOTER_SIZE)
4054                                 continue;
4055
4056                         set_page_dirty(page);
4057                         f2fs_put_page(page, 1);
4058                         page = NULL;
4059                 }
4060         }
4061         if (page) {
4062                 set_page_dirty(page);
4063                 f2fs_put_page(page, 1);
4064         }
4065 }
4066
4067 static void write_normal_summaries(struct f2fs_sb_info *sbi,
4068                                         block_t blkaddr, int type)
4069 {
4070         int i, end;
4071
4072         if (IS_DATASEG(type))
4073                 end = type + NR_CURSEG_DATA_TYPE;
4074         else
4075                 end = type + NR_CURSEG_NODE_TYPE;
4076
4077         for (i = type; i < end; i++)
4078                 write_current_sum_page(sbi, i, blkaddr + (i - type));
4079 }
4080
4081 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4082 {
4083         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
4084                 write_compacted_summaries(sbi, start_blk);
4085         else
4086                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
4087 }
4088
4089 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4090 {
4091         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
4092 }
4093
4094 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
4095                                         unsigned int val, int alloc)
4096 {
4097         int i;
4098
4099         if (type == NAT_JOURNAL) {
4100                 for (i = 0; i < nats_in_cursum(journal); i++) {
4101                         if (le32_to_cpu(nid_in_journal(journal, i)) == val)
4102                                 return i;
4103                 }
4104                 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
4105                         return update_nats_in_cursum(journal, 1);
4106         } else if (type == SIT_JOURNAL) {
4107                 for (i = 0; i < sits_in_cursum(journal); i++)
4108                         if (le32_to_cpu(segno_in_journal(journal, i)) == val)
4109                                 return i;
4110                 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
4111                         return update_sits_in_cursum(journal, 1);
4112         }
4113         return -1;
4114 }
4115
4116 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
4117                                         unsigned int segno)
4118 {
4119         return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
4120 }
4121
4122 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
4123                                         unsigned int start)
4124 {
4125         struct sit_info *sit_i = SIT_I(sbi);
4126         struct page *page;
4127         pgoff_t src_off, dst_off;
4128
4129         src_off = current_sit_addr(sbi, start);
4130         dst_off = next_sit_addr(sbi, src_off);
4131
4132         page = f2fs_grab_meta_page(sbi, dst_off);
4133         seg_info_to_sit_page(sbi, page, start);
4134
4135         set_page_dirty(page);
4136         set_to_next_sit(sit_i, start);
4137
4138         return page;
4139 }
4140
4141 static struct sit_entry_set *grab_sit_entry_set(void)
4142 {
4143         struct sit_entry_set *ses =
4144                         f2fs_kmem_cache_alloc(sit_entry_set_slab,
4145                                                 GFP_NOFS, true, NULL);
4146
4147         ses->entry_cnt = 0;
4148         INIT_LIST_HEAD(&ses->set_list);
4149         return ses;
4150 }
4151
4152 static void release_sit_entry_set(struct sit_entry_set *ses)
4153 {
4154         list_del(&ses->set_list);
4155         kmem_cache_free(sit_entry_set_slab, ses);
4156 }
4157
4158 static void adjust_sit_entry_set(struct sit_entry_set *ses,
4159                                                 struct list_head *head)
4160 {
4161         struct sit_entry_set *next = ses;
4162
4163         if (list_is_last(&ses->set_list, head))
4164                 return;
4165
4166         list_for_each_entry_continue(next, head, set_list)
4167                 if (ses->entry_cnt <= next->entry_cnt) {
4168                         list_move_tail(&ses->set_list, &next->set_list);
4169                         return;
4170                 }
4171
4172         list_move_tail(&ses->set_list, head);
4173 }
4174
4175 static void add_sit_entry(unsigned int segno, struct list_head *head)
4176 {
4177         struct sit_entry_set *ses;
4178         unsigned int start_segno = START_SEGNO(segno);
4179
4180         list_for_each_entry(ses, head, set_list) {
4181                 if (ses->start_segno == start_segno) {
4182                         ses->entry_cnt++;
4183                         adjust_sit_entry_set(ses, head);
4184                         return;
4185                 }
4186         }
4187
4188         ses = grab_sit_entry_set();
4189
4190         ses->start_segno = start_segno;
4191         ses->entry_cnt++;
4192         list_add(&ses->set_list, head);
4193 }
4194
4195 static void add_sits_in_set(struct f2fs_sb_info *sbi)
4196 {
4197         struct f2fs_sm_info *sm_info = SM_I(sbi);
4198         struct list_head *set_list = &sm_info->sit_entry_set;
4199         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4200         unsigned int segno;
4201
4202         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4203                 add_sit_entry(segno, set_list);
4204 }
4205
4206 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4207 {
4208         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4209         struct f2fs_journal *journal = curseg->journal;
4210         int i;
4211
4212         down_write(&curseg->journal_rwsem);
4213         for (i = 0; i < sits_in_cursum(journal); i++) {
4214                 unsigned int segno;
4215                 bool dirtied;
4216
4217                 segno = le32_to_cpu(segno_in_journal(journal, i));
4218                 dirtied = __mark_sit_entry_dirty(sbi, segno);
4219
4220                 if (!dirtied)
4221                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4222         }
4223         update_sits_in_cursum(journal, -i);
4224         up_write(&curseg->journal_rwsem);
4225 }
4226
4227 /*
4228  * CP calls this function, which flushes SIT entries including sit_journal,
4229  * and moves prefree segs to free segs.
4230  */
4231 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4232 {
4233         struct sit_info *sit_i = SIT_I(sbi);
4234         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4235         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4236         struct f2fs_journal *journal = curseg->journal;
4237         struct sit_entry_set *ses, *tmp;
4238         struct list_head *head = &SM_I(sbi)->sit_entry_set;
4239         bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4240         struct seg_entry *se;
4241
4242         down_write(&sit_i->sentry_lock);
4243
4244         if (!sit_i->dirty_sentries)
4245                 goto out;
4246
4247         /*
4248          * add and account sit entries of dirty bitmap in sit entry
4249          * set temporarily
4250          */
4251         add_sits_in_set(sbi);
4252
4253         /*
4254          * if there are no enough space in journal to store dirty sit
4255          * entries, remove all entries from journal and add and account
4256          * them in sit entry set.
4257          */
4258         if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4259                                                                 !to_journal)
4260                 remove_sits_in_journal(sbi);
4261
4262         /*
4263          * there are two steps to flush sit entries:
4264          * #1, flush sit entries to journal in current cold data summary block.
4265          * #2, flush sit entries to sit page.
4266          */
4267         list_for_each_entry_safe(ses, tmp, head, set_list) {
4268                 struct page *page = NULL;
4269                 struct f2fs_sit_block *raw_sit = NULL;
4270                 unsigned int start_segno = ses->start_segno;
4271                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4272                                                 (unsigned long)MAIN_SEGS(sbi));
4273                 unsigned int segno = start_segno;
4274
4275                 if (to_journal &&
4276                         !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4277                         to_journal = false;
4278
4279                 if (to_journal) {
4280                         down_write(&curseg->journal_rwsem);
4281                 } else {
4282                         page = get_next_sit_page(sbi, start_segno);
4283                         raw_sit = page_address(page);
4284                 }
4285
4286                 /* flush dirty sit entries in region of current sit set */
4287                 for_each_set_bit_from(segno, bitmap, end) {
4288                         int offset, sit_offset;
4289
4290                         se = get_seg_entry(sbi, segno);
4291 #ifdef CONFIG_F2FS_CHECK_FS
4292                         if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4293                                                 SIT_VBLOCK_MAP_SIZE))
4294                                 f2fs_bug_on(sbi, 1);
4295 #endif
4296
4297                         /* add discard candidates */
4298                         if (!(cpc->reason & CP_DISCARD)) {
4299                                 cpc->trim_start = segno;
4300                                 add_discard_addrs(sbi, cpc, false);
4301                         }
4302
4303                         if (to_journal) {
4304                                 offset = f2fs_lookup_journal_in_cursum(journal,
4305                                                         SIT_JOURNAL, segno, 1);
4306                                 f2fs_bug_on(sbi, offset < 0);
4307                                 segno_in_journal(journal, offset) =
4308                                                         cpu_to_le32(segno);
4309                                 seg_info_to_raw_sit(se,
4310                                         &sit_in_journal(journal, offset));
4311                                 check_block_count(sbi, segno,
4312                                         &sit_in_journal(journal, offset));
4313                         } else {
4314                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4315                                 seg_info_to_raw_sit(se,
4316                                                 &raw_sit->entries[sit_offset]);
4317                                 check_block_count(sbi, segno,
4318                                                 &raw_sit->entries[sit_offset]);
4319                         }
4320
4321                         __clear_bit(segno, bitmap);
4322                         sit_i->dirty_sentries--;
4323                         ses->entry_cnt--;
4324                 }
4325
4326                 if (to_journal)
4327                         up_write(&curseg->journal_rwsem);
4328                 else
4329                         f2fs_put_page(page, 1);
4330
4331                 f2fs_bug_on(sbi, ses->entry_cnt);
4332                 release_sit_entry_set(ses);
4333         }
4334
4335         f2fs_bug_on(sbi, !list_empty(head));
4336         f2fs_bug_on(sbi, sit_i->dirty_sentries);
4337 out:
4338         if (cpc->reason & CP_DISCARD) {
4339                 __u64 trim_start = cpc->trim_start;
4340
4341                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4342                         add_discard_addrs(sbi, cpc, false);
4343
4344                 cpc->trim_start = trim_start;
4345         }
4346         up_write(&sit_i->sentry_lock);
4347
4348         set_prefree_as_free_segments(sbi);
4349 }
4350
4351 static int build_sit_info(struct f2fs_sb_info *sbi)
4352 {
4353         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4354         struct sit_info *sit_i;
4355         unsigned int sit_segs, start;
4356         char *src_bitmap, *bitmap;
4357         unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4358         unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
4359
4360         /* allocate memory for SIT information */
4361         sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4362         if (!sit_i)
4363                 return -ENOMEM;
4364
4365         SM_I(sbi)->sit_info = sit_i;
4366
4367         sit_i->sentries =
4368                 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4369                                               MAIN_SEGS(sbi)),
4370                               GFP_KERNEL);
4371         if (!sit_i->sentries)
4372                 return -ENOMEM;
4373
4374         main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4375         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4376                                                                 GFP_KERNEL);
4377         if (!sit_i->dirty_sentries_bitmap)
4378                 return -ENOMEM;
4379
4380 #ifdef CONFIG_F2FS_CHECK_FS
4381         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
4382 #else
4383         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
4384 #endif
4385         sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4386         if (!sit_i->bitmap)
4387                 return -ENOMEM;
4388
4389         bitmap = sit_i->bitmap;
4390
4391         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4392                 sit_i->sentries[start].cur_valid_map = bitmap;
4393                 bitmap += SIT_VBLOCK_MAP_SIZE;
4394
4395                 sit_i->sentries[start].ckpt_valid_map = bitmap;
4396                 bitmap += SIT_VBLOCK_MAP_SIZE;
4397
4398 #ifdef CONFIG_F2FS_CHECK_FS
4399                 sit_i->sentries[start].cur_valid_map_mir = bitmap;
4400                 bitmap += SIT_VBLOCK_MAP_SIZE;
4401 #endif
4402
4403                 if (discard_map) {
4404                         sit_i->sentries[start].discard_map = bitmap;
4405                         bitmap += SIT_VBLOCK_MAP_SIZE;
4406                 }
4407         }
4408
4409         sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4410         if (!sit_i->tmp_map)
4411                 return -ENOMEM;
4412
4413         if (__is_large_section(sbi)) {
4414                 sit_i->sec_entries =
4415                         f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4416                                                       MAIN_SECS(sbi)),
4417                                       GFP_KERNEL);
4418                 if (!sit_i->sec_entries)
4419                         return -ENOMEM;
4420         }
4421
4422         /* get information related with SIT */
4423         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4424
4425         /* setup SIT bitmap from ckeckpoint pack */
4426         sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4427         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4428
4429         sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4430         if (!sit_i->sit_bitmap)
4431                 return -ENOMEM;
4432
4433 #ifdef CONFIG_F2FS_CHECK_FS
4434         sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4435                                         sit_bitmap_size, GFP_KERNEL);
4436         if (!sit_i->sit_bitmap_mir)
4437                 return -ENOMEM;
4438
4439         sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4440                                         main_bitmap_size, GFP_KERNEL);
4441         if (!sit_i->invalid_segmap)
4442                 return -ENOMEM;
4443 #endif
4444
4445         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4446         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4447         sit_i->written_valid_blocks = 0;
4448         sit_i->bitmap_size = sit_bitmap_size;
4449         sit_i->dirty_sentries = 0;
4450         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4451         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4452         sit_i->mounted_time = ktime_get_boottime_seconds();
4453         init_rwsem(&sit_i->sentry_lock);
4454         return 0;
4455 }
4456
4457 static int build_free_segmap(struct f2fs_sb_info *sbi)
4458 {
4459         struct free_segmap_info *free_i;
4460         unsigned int bitmap_size, sec_bitmap_size;
4461
4462         /* allocate memory for free segmap information */
4463         free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4464         if (!free_i)
4465                 return -ENOMEM;
4466
4467         SM_I(sbi)->free_info = free_i;
4468
4469         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4470         free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4471         if (!free_i->free_segmap)
4472                 return -ENOMEM;
4473
4474         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4475         free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4476         if (!free_i->free_secmap)
4477                 return -ENOMEM;
4478
4479         /* set all segments as dirty temporarily */
4480         memset(free_i->free_segmap, 0xff, bitmap_size);
4481         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4482
4483         /* init free segmap information */
4484         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4485         free_i->free_segments = 0;
4486         free_i->free_sections = 0;
4487         spin_lock_init(&free_i->segmap_lock);
4488         return 0;
4489 }
4490
4491 static int build_curseg(struct f2fs_sb_info *sbi)
4492 {
4493         struct curseg_info *array;
4494         int i;
4495
4496         array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4497                                         sizeof(*array)), GFP_KERNEL);
4498         if (!array)
4499                 return -ENOMEM;
4500
4501         SM_I(sbi)->curseg_array = array;
4502
4503         for (i = 0; i < NO_CHECK_TYPE; i++) {
4504                 mutex_init(&array[i].curseg_mutex);
4505                 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4506                 if (!array[i].sum_blk)
4507                         return -ENOMEM;
4508                 init_rwsem(&array[i].journal_rwsem);
4509                 array[i].journal = f2fs_kzalloc(sbi,
4510                                 sizeof(struct f2fs_journal), GFP_KERNEL);
4511                 if (!array[i].journal)
4512                         return -ENOMEM;
4513                 if (i < NR_PERSISTENT_LOG)
4514                         array[i].seg_type = CURSEG_HOT_DATA + i;
4515                 else if (i == CURSEG_COLD_DATA_PINNED)
4516                         array[i].seg_type = CURSEG_COLD_DATA;
4517                 else if (i == CURSEG_ALL_DATA_ATGC)
4518                         array[i].seg_type = CURSEG_COLD_DATA;
4519                 array[i].segno = NULL_SEGNO;
4520                 array[i].next_blkoff = 0;
4521                 array[i].inited = false;
4522         }
4523         return restore_curseg_summaries(sbi);
4524 }
4525
4526 static int build_sit_entries(struct f2fs_sb_info *sbi)
4527 {
4528         struct sit_info *sit_i = SIT_I(sbi);
4529         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4530         struct f2fs_journal *journal = curseg->journal;
4531         struct seg_entry *se;
4532         struct f2fs_sit_entry sit;
4533         int sit_blk_cnt = SIT_BLK_CNT(sbi);
4534         unsigned int i, start, end;
4535         unsigned int readed, start_blk = 0;
4536         int err = 0;
4537         block_t sit_valid_blocks[2] = {0, 0};
4538
4539         do {
4540                 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
4541                                                         META_SIT, true);
4542
4543                 start = start_blk * sit_i->sents_per_block;
4544                 end = (start_blk + readed) * sit_i->sents_per_block;
4545
4546                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4547                         struct f2fs_sit_block *sit_blk;
4548                         struct page *page;
4549
4550                         se = &sit_i->sentries[start];
4551                         page = get_current_sit_page(sbi, start);
4552                         if (IS_ERR(page))
4553                                 return PTR_ERR(page);
4554                         sit_blk = (struct f2fs_sit_block *)page_address(page);
4555                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4556                         f2fs_put_page(page, 1);
4557
4558                         err = check_block_count(sbi, start, &sit);
4559                         if (err)
4560                                 return err;
4561                         seg_info_from_raw_sit(se, &sit);
4562
4563                         if (se->type >= NR_PERSISTENT_LOG) {
4564                                 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4565                                                         se->type, start);
4566                                 f2fs_handle_error(sbi,
4567                                                 ERROR_INCONSISTENT_SUM_TYPE);
4568                                 return -EFSCORRUPTED;
4569                         }
4570
4571                         sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4572
4573                         if (f2fs_block_unit_discard(sbi)) {
4574                                 /* build discard map only one time */
4575                                 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4576                                         memset(se->discard_map, 0xff,
4577                                                 SIT_VBLOCK_MAP_SIZE);
4578                                 } else {
4579                                         memcpy(se->discard_map,
4580                                                 se->cur_valid_map,
4581                                                 SIT_VBLOCK_MAP_SIZE);
4582                                         sbi->discard_blks +=
4583                                                 sbi->blocks_per_seg -
4584                                                 se->valid_blocks;
4585                                 }
4586                         }
4587
4588                         if (__is_large_section(sbi))
4589                                 get_sec_entry(sbi, start)->valid_blocks +=
4590                                                         se->valid_blocks;
4591                 }
4592                 start_blk += readed;
4593         } while (start_blk < sit_blk_cnt);
4594
4595         down_read(&curseg->journal_rwsem);
4596         for (i = 0; i < sits_in_cursum(journal); i++) {
4597                 unsigned int old_valid_blocks;
4598
4599                 start = le32_to_cpu(segno_in_journal(journal, i));
4600                 if (start >= MAIN_SEGS(sbi)) {
4601                         f2fs_err(sbi, "Wrong journal entry on segno %u",
4602                                  start);
4603                         err = -EFSCORRUPTED;
4604                         f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
4605                         break;
4606                 }
4607
4608                 se = &sit_i->sentries[start];
4609                 sit = sit_in_journal(journal, i);
4610
4611                 old_valid_blocks = se->valid_blocks;
4612
4613                 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
4614
4615                 err = check_block_count(sbi, start, &sit);
4616                 if (err)
4617                         break;
4618                 seg_info_from_raw_sit(se, &sit);
4619
4620                 if (se->type >= NR_PERSISTENT_LOG) {
4621                         f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4622                                                         se->type, start);
4623                         err = -EFSCORRUPTED;
4624                         f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
4625                         break;
4626                 }
4627
4628                 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4629
4630                 if (f2fs_block_unit_discard(sbi)) {
4631                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4632                                 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4633                         } else {
4634                                 memcpy(se->discard_map, se->cur_valid_map,
4635                                                         SIT_VBLOCK_MAP_SIZE);
4636                                 sbi->discard_blks += old_valid_blocks;
4637                                 sbi->discard_blks -= se->valid_blocks;
4638                         }
4639                 }
4640
4641                 if (__is_large_section(sbi)) {
4642                         get_sec_entry(sbi, start)->valid_blocks +=
4643                                                         se->valid_blocks;
4644                         get_sec_entry(sbi, start)->valid_blocks -=
4645                                                         old_valid_blocks;
4646                 }
4647         }
4648         up_read(&curseg->journal_rwsem);
4649
4650         if (err)
4651                 return err;
4652
4653         if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
4654                 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4655                          sit_valid_blocks[NODE], valid_node_count(sbi));
4656                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
4657                 return -EFSCORRUPTED;
4658         }
4659
4660         if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
4661                                 valid_user_blocks(sbi)) {
4662                 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
4663                          sit_valid_blocks[DATA], sit_valid_blocks[NODE],
4664                          valid_user_blocks(sbi));
4665                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
4666                 return -EFSCORRUPTED;
4667         }
4668
4669         return 0;
4670 }
4671
4672 static void init_free_segmap(struct f2fs_sb_info *sbi)
4673 {
4674         unsigned int start;
4675         int type;
4676         struct seg_entry *sentry;
4677
4678         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4679                 if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4680                         continue;
4681                 sentry = get_seg_entry(sbi, start);
4682                 if (!sentry->valid_blocks)
4683                         __set_free(sbi, start);
4684                 else
4685                         SIT_I(sbi)->written_valid_blocks +=
4686                                                 sentry->valid_blocks;
4687         }
4688
4689         /* set use the current segments */
4690         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4691                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4692
4693                 __set_test_and_inuse(sbi, curseg_t->segno);
4694         }
4695 }
4696
4697 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4698 {
4699         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4700         struct free_segmap_info *free_i = FREE_I(sbi);
4701         unsigned int segno = 0, offset = 0, secno;
4702         block_t valid_blocks, usable_blks_in_seg;
4703
4704         while (1) {
4705                 /* find dirty segment based on free segmap */
4706                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4707                 if (segno >= MAIN_SEGS(sbi))
4708                         break;
4709                 offset = segno + 1;
4710                 valid_blocks = get_valid_blocks(sbi, segno, false);
4711                 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
4712                 if (valid_blocks == usable_blks_in_seg || !valid_blocks)
4713                         continue;
4714                 if (valid_blocks > usable_blks_in_seg) {
4715                         f2fs_bug_on(sbi, 1);
4716                         continue;
4717                 }
4718                 mutex_lock(&dirty_i->seglist_lock);
4719                 __locate_dirty_segment(sbi, segno, DIRTY);
4720                 mutex_unlock(&dirty_i->seglist_lock);
4721         }
4722
4723         if (!__is_large_section(sbi))
4724                 return;
4725
4726         mutex_lock(&dirty_i->seglist_lock);
4727         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4728                 valid_blocks = get_valid_blocks(sbi, segno, true);
4729                 secno = GET_SEC_FROM_SEG(sbi, segno);
4730
4731                 if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
4732                         continue;
4733                 if (IS_CURSEC(sbi, secno))
4734                         continue;
4735                 set_bit(secno, dirty_i->dirty_secmap);
4736         }
4737         mutex_unlock(&dirty_i->seglist_lock);
4738 }
4739
4740 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4741 {
4742         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4743         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4744
4745         dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4746         if (!dirty_i->victim_secmap)
4747                 return -ENOMEM;
4748
4749         dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4750         if (!dirty_i->pinned_secmap)
4751                 return -ENOMEM;
4752
4753         dirty_i->pinned_secmap_cnt = 0;
4754         dirty_i->enable_pin_section = true;
4755         return 0;
4756 }
4757
4758 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4759 {
4760         struct dirty_seglist_info *dirty_i;
4761         unsigned int bitmap_size, i;
4762
4763         /* allocate memory for dirty segments list information */
4764         dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4765                                                                 GFP_KERNEL);
4766         if (!dirty_i)
4767                 return -ENOMEM;
4768
4769         SM_I(sbi)->dirty_info = dirty_i;
4770         mutex_init(&dirty_i->seglist_lock);
4771
4772         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4773
4774         for (i = 0; i < NR_DIRTY_TYPE; i++) {
4775                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4776                                                                 GFP_KERNEL);
4777                 if (!dirty_i->dirty_segmap[i])
4778                         return -ENOMEM;
4779         }
4780
4781         if (__is_large_section(sbi)) {
4782                 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4783                 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
4784                                                 bitmap_size, GFP_KERNEL);
4785                 if (!dirty_i->dirty_secmap)
4786                         return -ENOMEM;
4787         }
4788
4789         init_dirty_segmap(sbi);
4790         return init_victim_secmap(sbi);
4791 }
4792
4793 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4794 {
4795         int i;
4796
4797         /*
4798          * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
4799          * In LFS curseg, all blkaddr after .next_blkoff should be unused.
4800          */
4801         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
4802                 struct curseg_info *curseg = CURSEG_I(sbi, i);
4803                 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4804                 unsigned int blkofs = curseg->next_blkoff;
4805
4806                 if (f2fs_sb_has_readonly(sbi) &&
4807                         i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
4808                         continue;
4809
4810                 sanity_check_seg_type(sbi, curseg->seg_type);
4811
4812                 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
4813                         f2fs_err(sbi,
4814                                  "Current segment has invalid alloc_type:%d",
4815                                  curseg->alloc_type);
4816                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4817                         return -EFSCORRUPTED;
4818                 }
4819
4820                 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4821                         goto out;
4822
4823                 if (curseg->alloc_type == SSR)
4824                         continue;
4825
4826                 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4827                         if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4828                                 continue;
4829 out:
4830                         f2fs_err(sbi,
4831                                  "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4832                                  i, curseg->segno, curseg->alloc_type,
4833                                  curseg->next_blkoff, blkofs);
4834                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4835                         return -EFSCORRUPTED;
4836                 }
4837         }
4838         return 0;
4839 }
4840
4841 #ifdef CONFIG_BLK_DEV_ZONED
4842
4843 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
4844                                     struct f2fs_dev_info *fdev,
4845                                     struct blk_zone *zone)
4846 {
4847         unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
4848         block_t zone_block, wp_block, last_valid_block;
4849         int i, s, b, ret;
4850         struct seg_entry *se;
4851
4852         if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4853                 return 0;
4854
4855         wp_block = fdev->start_blk + (zone->wp >> sbi->log_sectors_per_block);
4856         wp_segno = GET_SEGNO(sbi, wp_block);
4857         wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4858         zone_block = fdev->start_blk + (zone->start >>
4859                                                 sbi->log_sectors_per_block);
4860         zone_segno = GET_SEGNO(sbi, zone_block);
4861         zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
4862
4863         if (zone_segno >= MAIN_SEGS(sbi))
4864                 return 0;
4865
4866         /*
4867          * Skip check of zones cursegs point to, since
4868          * fix_curseg_write_pointer() checks them.
4869          */
4870         for (i = 0; i < NO_CHECK_TYPE; i++)
4871                 if (zone_secno == GET_SEC_FROM_SEG(sbi,
4872                                                    CURSEG_I(sbi, i)->segno))
4873                         return 0;
4874
4875         /*
4876          * Get last valid block of the zone.
4877          */
4878         last_valid_block = zone_block - 1;
4879         for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
4880                 segno = zone_segno + s;
4881                 se = get_seg_entry(sbi, segno);
4882                 for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
4883                         if (f2fs_test_bit(b, se->cur_valid_map)) {
4884                                 last_valid_block = START_BLOCK(sbi, segno) + b;
4885                                 break;
4886                         }
4887                 if (last_valid_block >= zone_block)
4888                         break;
4889         }
4890
4891         /*
4892          * The write pointer matches with the valid blocks or
4893          * already points to the end of the zone.
4894          */
4895         if ((last_valid_block + 1 == wp_block) ||
4896                         (zone->wp == zone->start + zone->len))
4897                 return 0;
4898
4899         if (last_valid_block + 1 == zone_block) {
4900                 /*
4901                  * If there is no valid block in the zone and if write pointer
4902                  * is not at zone start, reset the write pointer.
4903                  */
4904                 f2fs_notice(sbi,
4905                             "Zone without valid block has non-zero write "
4906                             "pointer. Reset the write pointer: wp[0x%x,0x%x]",
4907                             wp_segno, wp_blkoff);
4908                 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
4909                                 zone->len >> sbi->log_sectors_per_block);
4910                 if (ret)
4911                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
4912                                  fdev->path, ret);
4913
4914                 return ret;
4915         }
4916
4917         /*
4918          * If there are valid blocks and the write pointer doesn't
4919          * match with them, we need to report the inconsistency and
4920          * fill the zone till the end to close the zone. This inconsistency
4921          * does not cause write error because the zone will not be selected
4922          * for write operation until it get discarded.
4923          */
4924         f2fs_notice(sbi, "Valid blocks are not aligned with write pointer: "
4925                     "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
4926                     GET_SEGNO(sbi, last_valid_block),
4927                     GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
4928                     wp_segno, wp_blkoff);
4929
4930         ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
4931                                 zone->len - (zone->wp - zone->start),
4932                                 GFP_NOFS, 0);
4933         if (ret)
4934                 f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
4935                          fdev->path, ret);
4936
4937         return ret;
4938 }
4939
4940 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
4941                                                   block_t zone_blkaddr)
4942 {
4943         int i;
4944
4945         for (i = 0; i < sbi->s_ndevs; i++) {
4946                 if (!bdev_is_zoned(FDEV(i).bdev))
4947                         continue;
4948                 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
4949                                 zone_blkaddr <= FDEV(i).end_blk))
4950                         return &FDEV(i);
4951         }
4952
4953         return NULL;
4954 }
4955
4956 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
4957                               void *data)
4958 {
4959         memcpy(data, zone, sizeof(struct blk_zone));
4960         return 0;
4961 }
4962
4963 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
4964 {
4965         struct curseg_info *cs = CURSEG_I(sbi, type);
4966         struct f2fs_dev_info *zbd;
4967         struct blk_zone zone;
4968         unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
4969         block_t cs_zone_block, wp_block;
4970         sector_t zone_sector;
4971         int err;
4972
4973         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4974         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4975
4976         zbd = get_target_zoned_dev(sbi, cs_zone_block);
4977         if (!zbd)
4978                 return 0;
4979
4980         /* report zone for the sector the curseg points to */
4981         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) <<
4982                                                 sbi->log_sectors_per_block;
4983         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
4984                                   report_one_zone_cb, &zone);
4985         if (err != 1) {
4986                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
4987                          zbd->path, err);
4988                 return err;
4989         }
4990
4991         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4992                 return 0;
4993
4994         wp_block = zbd->start_blk + (zone.wp >> sbi->log_sectors_per_block);
4995         wp_segno = GET_SEGNO(sbi, wp_block);
4996         wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4997         wp_sector_off = zone.wp & GENMASK(sbi->log_sectors_per_block - 1, 0);
4998
4999         if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
5000                 wp_sector_off == 0)
5001                 return 0;
5002
5003         f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
5004                     "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
5005                     type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
5006
5007         f2fs_notice(sbi, "Assign new section to curseg[%d]: "
5008                     "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
5009
5010         f2fs_allocate_new_section(sbi, type, true);
5011
5012         /* check consistency of the zone curseg pointed to */
5013         if (check_zone_write_pointer(sbi, zbd, &zone))
5014                 return -EIO;
5015
5016         /* check newly assigned zone */
5017         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5018         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5019
5020         zbd = get_target_zoned_dev(sbi, cs_zone_block);
5021         if (!zbd)
5022                 return 0;
5023
5024         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) <<
5025                                                 sbi->log_sectors_per_block;
5026         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5027                                   report_one_zone_cb, &zone);
5028         if (err != 1) {
5029                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5030                          zbd->path, err);
5031                 return err;
5032         }
5033
5034         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5035                 return 0;
5036
5037         if (zone.wp != zone.start) {
5038                 f2fs_notice(sbi,
5039                             "New zone for curseg[%d] is not yet discarded. "
5040                             "Reset the zone: curseg[0x%x,0x%x]",
5041                             type, cs->segno, cs->next_blkoff);
5042                 err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
5043                                         zone.len >> sbi->log_sectors_per_block);
5044                 if (err) {
5045                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5046                                  zbd->path, err);
5047                         return err;
5048                 }
5049         }
5050
5051         return 0;
5052 }
5053
5054 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5055 {
5056         int i, ret;
5057
5058         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5059                 ret = fix_curseg_write_pointer(sbi, i);
5060                 if (ret)
5061                         return ret;
5062         }
5063
5064         return 0;
5065 }
5066
5067 struct check_zone_write_pointer_args {
5068         struct f2fs_sb_info *sbi;
5069         struct f2fs_dev_info *fdev;
5070 };
5071
5072 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
5073                                       void *data)
5074 {
5075         struct check_zone_write_pointer_args *args;
5076
5077         args = (struct check_zone_write_pointer_args *)data;
5078
5079         return check_zone_write_pointer(args->sbi, args->fdev, zone);
5080 }
5081
5082 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5083 {
5084         int i, ret;
5085         struct check_zone_write_pointer_args args;
5086
5087         for (i = 0; i < sbi->s_ndevs; i++) {
5088                 if (!bdev_is_zoned(FDEV(i).bdev))
5089                         continue;
5090
5091                 args.sbi = sbi;
5092                 args.fdev = &FDEV(i);
5093                 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
5094                                           check_zone_write_pointer_cb, &args);
5095                 if (ret < 0)
5096                         return ret;
5097         }
5098
5099         return 0;
5100 }
5101
5102 /*
5103  * Return the number of usable blocks in a segment. The number of blocks
5104  * returned is always equal to the number of blocks in a segment for
5105  * segments fully contained within a sequential zone capacity or a
5106  * conventional zone. For segments partially contained in a sequential
5107  * zone capacity, the number of usable blocks up to the zone capacity
5108  * is returned. 0 is returned in all other cases.
5109  */
5110 static inline unsigned int f2fs_usable_zone_blks_in_seg(
5111                         struct f2fs_sb_info *sbi, unsigned int segno)
5112 {
5113         block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5114         unsigned int secno;
5115
5116         if (!sbi->unusable_blocks_per_sec)
5117                 return sbi->blocks_per_seg;
5118
5119         secno = GET_SEC_FROM_SEG(sbi, segno);
5120         seg_start = START_BLOCK(sbi, segno);
5121         sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5122         sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
5123
5124         /*
5125          * If segment starts before zone capacity and spans beyond
5126          * zone capacity, then usable blocks are from seg start to
5127          * zone capacity. If the segment starts after the zone capacity,
5128          * then there are no usable blocks.
5129          */
5130         if (seg_start >= sec_cap_blkaddr)
5131                 return 0;
5132         if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
5133                 return sec_cap_blkaddr - seg_start;
5134
5135         return sbi->blocks_per_seg;
5136 }
5137 #else
5138 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5139 {
5140         return 0;
5141 }
5142
5143 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5144 {
5145         return 0;
5146 }
5147
5148 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5149                                                         unsigned int segno)
5150 {
5151         return 0;
5152 }
5153
5154 #endif
5155 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5156                                         unsigned int segno)
5157 {
5158         if (f2fs_sb_has_blkzoned(sbi))
5159                 return f2fs_usable_zone_blks_in_seg(sbi, segno);
5160
5161         return sbi->blocks_per_seg;
5162 }
5163
5164 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
5165                                         unsigned int segno)
5166 {
5167         if (f2fs_sb_has_blkzoned(sbi))
5168                 return CAP_SEGS_PER_SEC(sbi);
5169
5170         return sbi->segs_per_sec;
5171 }
5172
5173 /*
5174  * Update min, max modified time for cost-benefit GC algorithm
5175  */
5176 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5177 {
5178         struct sit_info *sit_i = SIT_I(sbi);
5179         unsigned int segno;
5180
5181         down_write(&sit_i->sentry_lock);
5182
5183         sit_i->min_mtime = ULLONG_MAX;
5184
5185         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
5186                 unsigned int i;
5187                 unsigned long long mtime = 0;
5188
5189                 for (i = 0; i < sbi->segs_per_sec; i++)
5190                         mtime += get_seg_entry(sbi, segno + i)->mtime;
5191
5192                 mtime = div_u64(mtime, sbi->segs_per_sec);
5193
5194                 if (sit_i->min_mtime > mtime)
5195                         sit_i->min_mtime = mtime;
5196         }
5197         sit_i->max_mtime = get_mtime(sbi, false);
5198         sit_i->dirty_max_mtime = 0;
5199         up_write(&sit_i->sentry_lock);
5200 }
5201
5202 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5203 {
5204         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5205         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5206         struct f2fs_sm_info *sm_info;
5207         int err;
5208
5209         sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5210         if (!sm_info)
5211                 return -ENOMEM;
5212
5213         /* init sm info */
5214         sbi->sm_info = sm_info;
5215         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5216         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5217         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5218         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5219         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5220         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5221         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5222         sm_info->rec_prefree_segments = sm_info->main_segments *
5223                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5224         if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5225                 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5226
5227         if (!f2fs_lfs_mode(sbi))
5228                 sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
5229         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5230         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5231         sm_info->min_seq_blocks = sbi->blocks_per_seg;
5232         sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5233         sm_info->min_ssr_sections = reserved_sections(sbi);
5234
5235         INIT_LIST_HEAD(&sm_info->sit_entry_set);
5236
5237         init_f2fs_rwsem(&sm_info->curseg_lock);
5238
5239         err = f2fs_create_flush_cmd_control(sbi);
5240         if (err)
5241                 return err;
5242
5243         err = create_discard_cmd_control(sbi);
5244         if (err)
5245                 return err;
5246
5247         err = build_sit_info(sbi);
5248         if (err)
5249                 return err;
5250         err = build_free_segmap(sbi);
5251         if (err)
5252                 return err;
5253         err = build_curseg(sbi);
5254         if (err)
5255                 return err;
5256
5257         /* reinit free segmap based on SIT */
5258         err = build_sit_entries(sbi);
5259         if (err)
5260                 return err;
5261
5262         init_free_segmap(sbi);
5263         err = build_dirty_segmap(sbi);
5264         if (err)
5265                 return err;
5266
5267         err = sanity_check_curseg(sbi);
5268         if (err)
5269                 return err;
5270
5271         init_min_max_mtime(sbi);
5272         return 0;
5273 }
5274
5275 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5276                 enum dirty_type dirty_type)
5277 {
5278         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5279
5280         mutex_lock(&dirty_i->seglist_lock);
5281         kvfree(dirty_i->dirty_segmap[dirty_type]);
5282         dirty_i->nr_dirty[dirty_type] = 0;
5283         mutex_unlock(&dirty_i->seglist_lock);
5284 }
5285
5286 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5287 {
5288         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5289
5290         kvfree(dirty_i->pinned_secmap);
5291         kvfree(dirty_i->victim_secmap);
5292 }
5293
5294 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5295 {
5296         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5297         int i;
5298
5299         if (!dirty_i)
5300                 return;
5301
5302         /* discard pre-free/dirty segments list */
5303         for (i = 0; i < NR_DIRTY_TYPE; i++)
5304                 discard_dirty_segmap(sbi, i);
5305
5306         if (__is_large_section(sbi)) {
5307                 mutex_lock(&dirty_i->seglist_lock);
5308                 kvfree(dirty_i->dirty_secmap);
5309                 mutex_unlock(&dirty_i->seglist_lock);
5310         }
5311
5312         destroy_victim_secmap(sbi);
5313         SM_I(sbi)->dirty_info = NULL;
5314         kfree(dirty_i);
5315 }
5316
5317 static void destroy_curseg(struct f2fs_sb_info *sbi)
5318 {
5319         struct curseg_info *array = SM_I(sbi)->curseg_array;
5320         int i;
5321
5322         if (!array)
5323                 return;
5324         SM_I(sbi)->curseg_array = NULL;
5325         for (i = 0; i < NR_CURSEG_TYPE; i++) {
5326                 kfree(array[i].sum_blk);
5327                 kfree(array[i].journal);
5328         }
5329         kfree(array);
5330 }
5331
5332 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5333 {
5334         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5335
5336         if (!free_i)
5337                 return;
5338         SM_I(sbi)->free_info = NULL;
5339         kvfree(free_i->free_segmap);
5340         kvfree(free_i->free_secmap);
5341         kfree(free_i);
5342 }
5343
5344 static void destroy_sit_info(struct f2fs_sb_info *sbi)
5345 {
5346         struct sit_info *sit_i = SIT_I(sbi);
5347
5348         if (!sit_i)
5349                 return;
5350
5351         if (sit_i->sentries)
5352                 kvfree(sit_i->bitmap);
5353         kfree(sit_i->tmp_map);
5354
5355         kvfree(sit_i->sentries);
5356         kvfree(sit_i->sec_entries);
5357         kvfree(sit_i->dirty_sentries_bitmap);
5358
5359         SM_I(sbi)->sit_info = NULL;
5360         kvfree(sit_i->sit_bitmap);
5361 #ifdef CONFIG_F2FS_CHECK_FS
5362         kvfree(sit_i->sit_bitmap_mir);
5363         kvfree(sit_i->invalid_segmap);
5364 #endif
5365         kfree(sit_i);
5366 }
5367
5368 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5369 {
5370         struct f2fs_sm_info *sm_info = SM_I(sbi);
5371
5372         if (!sm_info)
5373                 return;
5374         f2fs_destroy_flush_cmd_control(sbi, true);
5375         destroy_discard_cmd_control(sbi);
5376         destroy_dirty_segmap(sbi);
5377         destroy_curseg(sbi);
5378         destroy_free_segmap(sbi);
5379         destroy_sit_info(sbi);
5380         sbi->sm_info = NULL;
5381         kfree(sm_info);
5382 }
5383
5384 int __init f2fs_create_segment_manager_caches(void)
5385 {
5386         discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5387                         sizeof(struct discard_entry));
5388         if (!discard_entry_slab)
5389                 goto fail;
5390
5391         discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5392                         sizeof(struct discard_cmd));
5393         if (!discard_cmd_slab)
5394                 goto destroy_discard_entry;
5395
5396         sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5397                         sizeof(struct sit_entry_set));
5398         if (!sit_entry_set_slab)
5399                 goto destroy_discard_cmd;
5400
5401         revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
5402                         sizeof(struct revoke_entry));
5403         if (!revoke_entry_slab)
5404                 goto destroy_sit_entry_set;
5405         return 0;
5406
5407 destroy_sit_entry_set:
5408         kmem_cache_destroy(sit_entry_set_slab);
5409 destroy_discard_cmd:
5410         kmem_cache_destroy(discard_cmd_slab);
5411 destroy_discard_entry:
5412         kmem_cache_destroy(discard_entry_slab);
5413 fail:
5414         return -ENOMEM;
5415 }
5416
5417 void f2fs_destroy_segment_manager_caches(void)
5418 {
5419         kmem_cache_destroy(sit_entry_set_slab);
5420         kmem_cache_destroy(discard_cmd_slab);
5421         kmem_cache_destroy(discard_entry_slab);
5422         kmem_cache_destroy(revoke_entry_slab);
5423 }