fs/f2fs/data.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * fs/f2fs/data.c
   4  *
   5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   6  *             http://www.samsung.com/
   7  */
   8 #include <linux/fs.h>
   9 #include <linux/f2fs_fs.h>
  10 #include <linux/buffer_head.h>
  11 #include <linux/sched/mm.h>
  12 #include <linux/mpage.h>
  13 #include <linux/writeback.h>
  14 #include <linux/pagevec.h>
  15 #include <linux/blkdev.h>
  16 #include <linux/bio.h>
  17 #include <linux/blk-crypto.h>
  18 #include <linux/swap.h>
  19 #include <linux/prefetch.h>
  20 #include <linux/uio.h>
  21 #include <linux/sched/signal.h>
  22 #include <linux/fiemap.h>
  23 #include <linux/iomap.h>
  24
  25 #include "f2fs.h"
  26 #include "node.h"
  27 #include "segment.h"
  28 #include "iostat.h"
  29 #include <trace/events/f2fs.h>
  30
  31 #define NUM_PREALLOC_POST_READ_CTXS     128
  32
  33 static struct kmem_cache *bio_post_read_ctx_cache;
  34 static struct kmem_cache *bio_entry_slab;
  35 static mempool_t *bio_post_read_ctx_pool;
  36 static struct bio_set f2fs_bioset;
  37
  38 #define F2FS_BIO_POOL_SIZE      NR_CURSEG_TYPE
  39
  40 int __init f2fs_init_bioset(void)
  41 {
  42         if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
  43                                         0, BIOSET_NEED_BVECS))
  44                 return -ENOMEM;
  45         return 0;
  46 }
  47
  48 void f2fs_destroy_bioset(void)
  49 {
  50         bioset_exit(&f2fs_bioset);
  51 }
  52
  53 static bool __is_cp_guaranteed(struct page *page)
  54 {
  55         struct address_space *mapping = page->mapping;
  56         struct inode *inode;
  57         struct f2fs_sb_info *sbi;
  58
  59         if (!mapping)
  60                 return false;
  61
  62         inode = mapping->host;
  63         sbi = F2FS_I_SB(inode);
  64
  65         if (inode->i_ino == F2FS_META_INO(sbi) ||
  66                         inode->i_ino == F2FS_NODE_INO(sbi) ||
  67                         S_ISDIR(inode->i_mode))
  68                 return true;
  69
  70         if (f2fs_is_compressed_page(page))
  71                 return false;
  72         if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
  73                         page_private_gcing(page))
  74                 return true;
  75         return false;
  76 }
  77
  78 static enum count_type __read_io_type(struct page *page)
  79 {
  80         struct address_space *mapping = page_file_mapping(page);
  81
  82         if (mapping) {
  83                 struct inode *inode = mapping->host;
  84                 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  85
  86                 if (inode->i_ino == F2FS_META_INO(sbi))
  87                         return F2FS_RD_META;
  88
  89                 if (inode->i_ino == F2FS_NODE_INO(sbi))
  90                         return F2FS_RD_NODE;
  91         }
  92         return F2FS_RD_DATA;
  93 }
  94
  95 /* postprocessing steps for read bios */
  96 enum bio_post_read_step {
  97 #ifdef CONFIG_FS_ENCRYPTION
  98         STEP_DECRYPT    = 1 << 0,
  99 #else
 100         STEP_DECRYPT    = 0,    /* compile out the decryption-related code */
 101 #endif
 102 #ifdef CONFIG_F2FS_FS_COMPRESSION
 103         STEP_DECOMPRESS = 1 << 1,
 104 #else
 105         STEP_DECOMPRESS = 0,    /* compile out the decompression-related code */
 106 #endif
 107 #ifdef CONFIG_FS_VERITY
 108         STEP_VERITY     = 1 << 2,
 109 #else
 110         STEP_VERITY     = 0,    /* compile out the verity-related code */
 111 #endif
 112 };
 113
 114 struct bio_post_read_ctx {
 115         struct bio *bio;
 116         struct f2fs_sb_info *sbi;
 117         struct work_struct work;
 118         unsigned int enabled_steps;
 119         block_t fs_blkaddr;
 120 };
 121
 122 static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
 123 {
 124         struct bio_vec *bv;
 125         struct bvec_iter_all iter_all;
 126
 127         /*
 128          * Update and unlock the bio's pagecache pages, and put the
 129          * decompression context for any compressed pages.
 130          */
 131         bio_for_each_segment_all(bv, bio, iter_all) {
 132                 struct page *page = bv->bv_page;
 133
 134                 if (f2fs_is_compressed_page(page)) {
 135                         if (bio->bi_status)
 136                                 f2fs_end_read_compressed_page(page, true, 0,
 137                                                         in_task);
 138                         f2fs_put_page_dic(page, in_task);
 139                         continue;
 140                 }
 141
 142                 /* PG_error was set if verity failed. */
 143                 if (bio->bi_status || PageError(page)) {
 144                         ClearPageUptodate(page);
 145                         /* will re-read again later */
 146                         ClearPageError(page);
 147                 } else {
 148                         SetPageUptodate(page);
 149                 }
 150                 dec_page_count(F2FS_P_SB(page), __read_io_type(page));
 151                 unlock_page(page);
 152         }
 153
 154         if (bio->bi_private)
 155                 mempool_free(bio->bi_private, bio_post_read_ctx_pool);
 156         bio_put(bio);
 157 }
 158
 159 static void f2fs_verify_bio(struct work_struct *work)
 160 {
 161         struct bio_post_read_ctx *ctx =
 162                 container_of(work, struct bio_post_read_ctx, work);
 163         struct bio *bio = ctx->bio;
 164         bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
 165
 166         /*
 167          * fsverity_verify_bio() may call readahead() again, and while verity
 168          * will be disabled for this, decryption and/or decompression may still
 169          * be needed, resulting in another bio_post_read_ctx being allocated.
 170          * So to prevent deadlocks we need to release the current ctx to the
 171          * mempool first.  This assumes that verity is the last post-read step.
 172          */
 173         mempool_free(ctx, bio_post_read_ctx_pool);
 174         bio->bi_private = NULL;
 175
 176         /*
 177          * Verify the bio's pages with fs-verity.  Exclude compressed pages,
 178          * as those were handled separately by f2fs_end_read_compressed_page().
 179          */
 180         if (may_have_compressed_pages) {
 181                 struct bio_vec *bv;
 182                 struct bvec_iter_all iter_all;
 183
 184                 bio_for_each_segment_all(bv, bio, iter_all) {
 185                         struct page *page = bv->bv_page;
 186
 187                         if (!f2fs_is_compressed_page(page) &&
 188                             !fsverity_verify_page(page))
 189                                 SetPageError(page);
 190                 }
 191         } else {
 192                 fsverity_verify_bio(bio);
 193         }
 194
 195         f2fs_finish_read_bio(bio, true);
 196 }
 197
 198 /*
 199  * If the bio's data needs to be verified with fs-verity, then enqueue the
 200  * verity work for the bio.  Otherwise finish the bio now.
 201  *
 202  * Note that to avoid deadlocks, the verity work can't be done on the
 203  * decryption/decompression workqueue.  This is because verifying the data pages
 204  * can involve reading verity metadata pages from the file, and these verity
 205  * metadata pages may be encrypted and/or compressed.
 206  */
 207 static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
 208 {
 209         struct bio_post_read_ctx *ctx = bio->bi_private;
 210
 211         if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
 212                 INIT_WORK(&ctx->work, f2fs_verify_bio);
 213                 fsverity_enqueue_verify_work(&ctx->work);
 214         } else {
 215                 f2fs_finish_read_bio(bio, in_task);
 216         }
 217 }
 218
 219 /*
 220  * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
 221  * remaining page was read by @ctx->bio.
 222  *
 223  * Note that a bio may span clusters (even a mix of compressed and uncompressed
 224  * clusters) or be for just part of a cluster.  STEP_DECOMPRESS just indicates
 225  * that the bio includes at least one compressed page.  The actual decompression
 226  * is done on a per-cluster basis, not a per-bio basis.
 227  */
 228 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
 229                 bool in_task)
 230 {
 231         struct bio_vec *bv;
 232         struct bvec_iter_all iter_all;
 233         bool all_compressed = true;
 234         block_t blkaddr = ctx->fs_blkaddr;
 235
 236         bio_for_each_segment_all(bv, ctx->bio, iter_all) {
 237                 struct page *page = bv->bv_page;
 238
 239                 if (f2fs_is_compressed_page(page))
 240                         f2fs_end_read_compressed_page(page, false, blkaddr,
 241                                                       in_task);
 242                 else
 243                         all_compressed = false;
 244
 245                 blkaddr++;
 246         }
 247
 248         /*
 249          * Optimization: if all the bio's pages are compressed, then scheduling
 250          * the per-bio verity work is unnecessary, as verity will be fully
 251          * handled at the compression cluster level.
 252          */
 253         if (all_compressed)
 254                 ctx->enabled_steps &= ~STEP_VERITY;
 255 }
 256
 257 static void f2fs_post_read_work(struct work_struct *work)
 258 {
 259         struct bio_post_read_ctx *ctx =
 260                 container_of(work, struct bio_post_read_ctx, work);
 261         struct bio *bio = ctx->bio;
 262
 263         if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
 264                 f2fs_finish_read_bio(bio, true);
 265                 return;
 266         }
 267
 268         if (ctx->enabled_steps & STEP_DECOMPRESS)
 269                 f2fs_handle_step_decompress(ctx, true);
 270
 271         f2fs_verify_and_finish_bio(bio, true);
 272 }
 273
 274 static void f2fs_read_end_io(struct bio *bio)
 275 {
 276         struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
 277         struct bio_post_read_ctx *ctx;
 278         bool intask = in_task();
 279
 280         iostat_update_and_unbind_ctx(bio, 0);
 281         ctx = bio->bi_private;
 282
 283         if (time_to_inject(sbi, FAULT_READ_IO)) {
 284                 f2fs_show_injection_info(sbi, FAULT_READ_IO);
 285                 bio->bi_status = BLK_STS_IOERR;
 286         }
 287
 288         if (bio->bi_status) {
 289                 f2fs_finish_read_bio(bio, intask);
 290                 return;
 291         }
 292
 293         if (ctx) {
 294                 unsigned int enabled_steps = ctx->enabled_steps &
 295                                         (STEP_DECRYPT | STEP_DECOMPRESS);
 296
 297                 /*
 298                  * If we have only decompression step between decompression and
 299                  * decrypt, we don't need post processing for this.
 300                  */
 301                 if (enabled_steps == STEP_DECOMPRESS &&
 302                                 !f2fs_low_mem_mode(sbi)) {
 303                         f2fs_handle_step_decompress(ctx, intask);
 304                 } else if (enabled_steps) {
 305                         INIT_WORK(&ctx->work, f2fs_post_read_work);
 306                         queue_work(ctx->sbi->post_read_wq, &ctx->work);
 307                         return;
 308                 }
 309         }
 310
 311         f2fs_verify_and_finish_bio(bio, intask);
 312 }
 313
 314 static void f2fs_write_end_io(struct bio *bio)
 315 {
 316         struct f2fs_sb_info *sbi;
 317         struct bio_vec *bvec;
 318         struct bvec_iter_all iter_all;
 319
 320         iostat_update_and_unbind_ctx(bio, 1);
 321         sbi = bio->bi_private;
 322
 323         if (time_to_inject(sbi, FAULT_WRITE_IO)) {
 324                 f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
 325                 bio->bi_status = BLK_STS_IOERR;
 326         }
 327
 328         bio_for_each_segment_all(bvec, bio, iter_all) {
 329                 struct page *page = bvec->bv_page;
 330                 enum count_type type = WB_DATA_TYPE(page);
 331
 332                 if (page_private_dummy(page)) {
 333                         clear_page_private_dummy(page);
 334                         unlock_page(page);
 335                         mempool_free(page, sbi->write_io_dummy);
 336
 337                         if (unlikely(bio->bi_status))
 338                                 f2fs_stop_checkpoint(sbi, true);
 339                         continue;
 340                 }
 341
 342                 fscrypt_finalize_bounce_page(&page);
 343
 344 #ifdef CONFIG_F2FS_FS_COMPRESSION
 345                 if (f2fs_is_compressed_page(page)) {
 346                         f2fs_compress_write_end_io(bio, page);
 347                         continue;
 348                 }
 349 #endif
 350
 351                 if (unlikely(bio->bi_status)) {
 352                         mapping_set_error(page->mapping, -EIO);
 353                         if (type == F2FS_WB_CP_DATA)
 354                                 f2fs_stop_checkpoint(sbi, true);
 355                 }
 356
 357                 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
 358                                         page->index != nid_of_node(page));
 359
 360                 dec_page_count(sbi, type);
 361                 if (f2fs_in_warm_node_list(sbi, page))
 362                         f2fs_del_fsync_node_entry(sbi, page);
 363                 clear_page_private_gcing(page);
 364                 end_page_writeback(page);
 365         }
 366         if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
 367                                 wq_has_sleeper(&sbi->cp_wait))
 368                 wake_up(&sbi->cp_wait);
 369
 370         bio_put(bio);
 371 }
 372
 373 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
 374                 block_t blk_addr, sector_t *sector)
 375 {
 376         struct block_device *bdev = sbi->sb->s_bdev;
 377         int i;
 378
 379         if (f2fs_is_multi_device(sbi)) {
 380                 for (i = 0; i < sbi->s_ndevs; i++) {
 381                         if (FDEV(i).start_blk <= blk_addr &&
 382                             FDEV(i).end_blk >= blk_addr) {
 383                                 blk_addr -= FDEV(i).start_blk;
 384                                 bdev = FDEV(i).bdev;
 385                                 break;
 386                         }
 387                 }
 388         }
 389
 390         if (sector)
 391                 *sector = SECTOR_FROM_BLOCK(blk_addr);
 392         return bdev;
 393 }
 394
 395 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
 396 {
 397         int i;
 398
 399         if (!f2fs_is_multi_device(sbi))
 400                 return 0;
 401
 402         for (i = 0; i < sbi->s_ndevs; i++)
 403                 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
 404                         return i;
 405         return 0;
 406 }
 407
 408 static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
 409 {
 410         unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
 411         unsigned int fua_flag, meta_flag, io_flag;
 412         blk_opf_t op_flags = 0;
 413
 414         if (fio->op != REQ_OP_WRITE)
 415                 return 0;
 416         if (fio->type == DATA)
 417                 io_flag = fio->sbi->data_io_flag;
 418         else if (fio->type == NODE)
 419                 io_flag = fio->sbi->node_io_flag;
 420         else
 421                 return 0;
 422
 423         fua_flag = io_flag & temp_mask;
 424         meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
 425
 426         /*
 427          * data/node io flag bits per temp:
 428          *      REQ_META     |      REQ_FUA      |
 429          *    5 |    4 |   3 |    2 |    1 |   0 |
 430          * Cold | Warm | Hot | Cold | Warm | Hot |
 431          */
 432         if ((1 << fio->temp) & meta_flag)
 433                 op_flags |= REQ_META;
 434         if ((1 << fio->temp) & fua_flag)
 435                 op_flags |= REQ_FUA;
 436         return op_flags;
 437 }
 438
 439 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 440 {
 441         struct f2fs_sb_info *sbi = fio->sbi;
 442         struct block_device *bdev;
 443         sector_t sector;
 444         struct bio *bio;
 445
 446         bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
 447         bio = bio_alloc_bioset(bdev, npages,
 448                                 fio->op | fio->op_flags | f2fs_io_flags(fio),
 449                                 GFP_NOIO, &f2fs_bioset);
 450         bio->bi_iter.bi_sector = sector;
 451         if (is_read_io(fio->op)) {
 452                 bio->bi_end_io = f2fs_read_end_io;
 453                 bio->bi_private = NULL;
 454         } else {
 455                 bio->bi_end_io = f2fs_write_end_io;
 456                 bio->bi_private = sbi;
 457         }
 458         iostat_alloc_and_bind_ctx(sbi, bio, NULL);
 459
 460         if (fio->io_wbc)
 461                 wbc_init_bio(fio->io_wbc, bio);
 462
 463         return bio;
 464 }
 465
 466 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
 467                                   pgoff_t first_idx,
 468                                   const struct f2fs_io_info *fio,
 469                                   gfp_t gfp_mask)
 470 {
 471         /*
 472          * The f2fs garbage collector sets ->encrypted_page when it wants to
 473          * read/write raw data without encryption.
 474          */
 475         if (!fio || !fio->encrypted_page)
 476                 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
 477 }
 478
 479 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 480                                      pgoff_t next_idx,
 481                                      const struct f2fs_io_info *fio)
 482 {
 483         /*
 484          * The f2fs garbage collector sets ->encrypted_page when it wants to
 485          * read/write raw data without encryption.
 486          */
 487         if (fio && fio->encrypted_page)
 488                 return !bio_has_crypt_ctx(bio);
 489
 490         return fscrypt_mergeable_bio(bio, inode, next_idx);
 491 }
 492
 493 static inline void __submit_bio(struct f2fs_sb_info *sbi,
 494                                 struct bio *bio, enum page_type type)
 495 {
 496         if (!is_read_io(bio_op(bio))) {
 497                 unsigned int start;
 498
 499                 if (type != DATA && type != NODE)
 500                         goto submit_io;
 501
 502                 if (f2fs_lfs_mode(sbi) && current->plug)
 503                         blk_finish_plug(current->plug);
 504
 505                 if (!F2FS_IO_ALIGNED(sbi))
 506                         goto submit_io;
 507
 508                 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
 509                 start %= F2FS_IO_SIZE(sbi);
 510
 511                 if (start == 0)
 512                         goto submit_io;
 513
 514                 /* fill dummy pages */
 515                 for (; start < F2FS_IO_SIZE(sbi); start++) {
 516                         struct page *page =
 517                                 mempool_alloc(sbi->write_io_dummy,
 518                                               GFP_NOIO | __GFP_NOFAIL);
 519                         f2fs_bug_on(sbi, !page);
 520
 521                         lock_page(page);
 522
 523                         zero_user_segment(page, 0, PAGE_SIZE);
 524                         set_page_private_dummy(page);
 525
 526                         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
 527                                 f2fs_bug_on(sbi, 1);
 528                 }
 529                 /*
 530                  * In the NODE case, we lose next block address chain. So, we
 531                  * need to do checkpoint in f2fs_sync_file.
 532                  */
 533                 if (type == NODE)
 534                         set_sbi_flag(sbi, SBI_NEED_CP);
 535         }
 536 submit_io:
 537         if (is_read_io(bio_op(bio)))
 538                 trace_f2fs_submit_read_bio(sbi->sb, type, bio);
 539         else
 540                 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
 541
 542         iostat_update_submit_ctx(bio, type);
 543         submit_bio(bio);
 544 }
 545
 546 void f2fs_submit_bio(struct f2fs_sb_info *sbi,
 547                                 struct bio *bio, enum page_type type)
 548 {
 549         __submit_bio(sbi, bio, type);
 550 }
 551
 552 static void __submit_merged_bio(struct f2fs_bio_info *io)
 553 {
 554         struct f2fs_io_info *fio = &io->fio;
 555
 556         if (!io->bio)
 557                 return;
 558
 559         if (is_read_io(fio->op))
 560                 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 561         else
 562                 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
 563
 564         __submit_bio(io->sbi, io->bio, fio->type);
 565         io->bio = NULL;
 566 }
 567
 568 static bool __has_merged_page(struct bio *bio, struct inode *inode,
 569                                                 struct page *page, nid_t ino)
 570 {
 571         struct bio_vec *bvec;
 572         struct bvec_iter_all iter_all;
 573
 574         if (!bio)
 575                 return false;
 576
 577         if (!inode && !page && !ino)
 578                 return true;
 579
 580         bio_for_each_segment_all(bvec, bio, iter_all) {
 581                 struct page *target = bvec->bv_page;
 582
 583                 if (fscrypt_is_bounce_page(target)) {
 584                         target = fscrypt_pagecache_page(target);
 585                         if (IS_ERR(target))
 586                                 continue;
 587                 }
 588                 if (f2fs_is_compressed_page(target)) {
 589                         target = f2fs_compress_control_page(target);
 590                         if (IS_ERR(target))
 591                                 continue;
 592                 }
 593
 594                 if (inode && inode == target->mapping->host)
 595                         return true;
 596                 if (page && page == target)
 597                         return true;
 598                 if (ino && ino == ino_of_node(target))
 599                         return true;
 600         }
 601
 602         return false;
 603 }
 604
 605 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
 606 {
 607         int i;
 608
 609         for (i = 0; i < NR_PAGE_TYPE; i++) {
 610                 int n = (i == META) ? 1 : NR_TEMP_TYPE;
 611                 int j;
 612
 613                 sbi->write_io[i] = f2fs_kmalloc(sbi,
 614                                 array_size(n, sizeof(struct f2fs_bio_info)),
 615                                 GFP_KERNEL);
 616                 if (!sbi->write_io[i])
 617                         return -ENOMEM;
 618
 619                 for (j = HOT; j < n; j++) {
 620                         init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
 621                         sbi->write_io[i][j].sbi = sbi;
 622                         sbi->write_io[i][j].bio = NULL;
 623                         spin_lock_init(&sbi->write_io[i][j].io_lock);
 624                         INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
 625                         INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
 626                         init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
 627                 }
 628         }
 629
 630         return 0;
 631 }
 632
 633 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 634                                 enum page_type type, enum temp_type temp)
 635 {
 636         enum page_type btype = PAGE_TYPE_OF_BIO(type);
 637         struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 638
 639         f2fs_down_write(&io->io_rwsem);
 640
 641         /* change META to META_FLUSH in the checkpoint procedure */
 642         if (type >= META_FLUSH) {
 643                 io->fio.type = META_FLUSH;
 644                 io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
 645                 if (!test_opt(sbi, NOBARRIER))
 646                         io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
 647         }
 648         __submit_merged_bio(io);
 649         f2fs_up_write(&io->io_rwsem);
 650 }
 651
 652 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
 653                                 struct inode *inode, struct page *page,
 654                                 nid_t ino, enum page_type type, bool force)
 655 {
 656         enum temp_type temp;
 657         bool ret = true;
 658
 659         for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
 660                 if (!force)     {
 661                         enum page_type btype = PAGE_TYPE_OF_BIO(type);
 662                         struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 663
 664                         f2fs_down_read(&io->io_rwsem);
 665                         ret = __has_merged_page(io->bio, inode, page, ino);
 666                         f2fs_up_read(&io->io_rwsem);
 667                 }
 668                 if (ret)
 669                         __f2fs_submit_merged_write(sbi, type, temp);
 670
 671                 /* TODO: use HOT temp only for meta pages now. */
 672                 if (type >= META)
 673                         break;
 674         }
 675 }
 676
 677 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 678 {
 679         __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
 680 }
 681
 682 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 683                                 struct inode *inode, struct page *page,
 684                                 nid_t ino, enum page_type type)
 685 {
 686         __submit_merged_write_cond(sbi, inode, page, ino, type, false);
 687 }
 688
 689 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
 690 {
 691         f2fs_submit_merged_write(sbi, DATA);
 692         f2fs_submit_merged_write(sbi, NODE);
 693         f2fs_submit_merged_write(sbi, META);
 694 }
 695
 696 /*
 697  * Fill the locked page with data located in the block address.
 698  * A caller needs to unlock the page on failure.
 699  */
 700 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 701 {
 702         struct bio *bio;
 703         struct page *page = fio->encrypted_page ?
 704                         fio->encrypted_page : fio->page;
 705
 706         if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
 707                         fio->is_por ? META_POR : (__is_meta_io(fio) ?
 708                         META_GENERIC : DATA_GENERIC_ENHANCE)))
 709                 return -EFSCORRUPTED;
 710
 711         trace_f2fs_submit_page_bio(page, fio);
 712
 713         /* Allocate a new bio */
 714         bio = __bio_alloc(fio, 1);
 715
 716         f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
 717                                fio->page->index, fio, GFP_NOIO);
 718
 719         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 720                 bio_put(bio);
 721                 return -EFAULT;
 722         }
 723
 724         if (fio->io_wbc && !is_read_io(fio->op))
 725                 wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
 726
 727         inc_page_count(fio->sbi, is_read_io(fio->op) ?
 728                         __read_io_type(page): WB_DATA_TYPE(fio->page));
 729
 730         __submit_bio(fio->sbi, bio, fio->type);
 731         return 0;
 732 }
 733
 734 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 735                                 block_t last_blkaddr, block_t cur_blkaddr)
 736 {
 737         if (unlikely(sbi->max_io_bytes &&
 738                         bio->bi_iter.bi_size >= sbi->max_io_bytes))
 739                 return false;
 740         if (last_blkaddr + 1 != cur_blkaddr)
 741                 return false;
 742         return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
 743 }
 744
 745 static bool io_type_is_mergeable(struct f2fs_bio_info *io,
 746                                                 struct f2fs_io_info *fio)
 747 {
 748         if (io->fio.op != fio->op)
 749                 return false;
 750         return io->fio.op_flags == fio->op_flags;
 751 }
 752
 753 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 754                                         struct f2fs_bio_info *io,
 755                                         struct f2fs_io_info *fio,
 756                                         block_t last_blkaddr,
 757                                         block_t cur_blkaddr)
 758 {
 759         if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
 760                 unsigned int filled_blocks =
 761                                 F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
 762                 unsigned int io_size = F2FS_IO_SIZE(sbi);
 763                 unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
 764
 765                 /* IOs in bio is aligned and left space of vectors is not enough */
 766                 if (!(filled_blocks % io_size) && left_vecs < io_size)
 767                         return false;
 768         }
 769         if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
 770                 return false;
 771         return io_type_is_mergeable(io, fio);
 772 }
 773
 774 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
 775                                 struct page *page, enum temp_type temp)
 776 {
 777         struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 778         struct bio_entry *be;
 779
 780         be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
 781         be->bio = bio;
 782         bio_get(bio);
 783
 784         if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
 785                 f2fs_bug_on(sbi, 1);
 786
 787         f2fs_down_write(&io->bio_list_lock);
 788         list_add_tail(&be->list, &io->bio_list);
 789         f2fs_up_write(&io->bio_list_lock);
 790 }
 791
 792 static void del_bio_entry(struct bio_entry *be)
 793 {
 794         list_del(&be->list);
 795         kmem_cache_free(bio_entry_slab, be);
 796 }
 797
 798 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
 799                                                         struct page *page)
 800 {
 801         struct f2fs_sb_info *sbi = fio->sbi;
 802         enum temp_type temp;
 803         bool found = false;
 804         int ret = -EAGAIN;
 805
 806         for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
 807                 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 808                 struct list_head *head = &io->bio_list;
 809                 struct bio_entry *be;
 810
 811                 f2fs_down_write(&io->bio_list_lock);
 812                 list_for_each_entry(be, head, list) {
 813                         if (be->bio != *bio)
 814                                 continue;
 815
 816                         found = true;
 817
 818                         f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
 819                                                             *fio->last_block,
 820                                                             fio->new_blkaddr));
 821                         if (f2fs_crypt_mergeable_bio(*bio,
 822                                         fio->page->mapping->host,
 823                                         fio->page->index, fio) &&
 824                             bio_add_page(*bio, page, PAGE_SIZE, 0) ==
 825                                         PAGE_SIZE) {
 826                                 ret = 0;
 827                                 break;
 828                         }
 829
 830                         /* page can't be merged into bio; submit the bio */
 831                         del_bio_entry(be);
 832                         __submit_bio(sbi, *bio, DATA);
 833                         break;
 834                 }
 835                 f2fs_up_write(&io->bio_list_lock);
 836         }
 837
 838         if (ret) {
 839                 bio_put(*bio);
 840                 *bio = NULL;
 841         }
 842
 843         return ret;
 844 }
 845
 846 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
 847                                         struct bio **bio, struct page *page)
 848 {
 849         enum temp_type temp;
 850         bool found = false;
 851         struct bio *target = bio ? *bio : NULL;
 852
 853         for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
 854                 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 855                 struct list_head *head = &io->bio_list;
 856                 struct bio_entry *be;
 857
 858                 if (list_empty(head))
 859                         continue;
 860
 861                 f2fs_down_read(&io->bio_list_lock);
 862                 list_for_each_entry(be, head, list) {
 863                         if (target)
 864                                 found = (target == be->bio);
 865                         else
 866                                 found = __has_merged_page(be->bio, NULL,
 867                                                                 page, 0);
 868                         if (found)
 869                                 break;
 870                 }
 871                 f2fs_up_read(&io->bio_list_lock);
 872
 873                 if (!found)
 874                         continue;
 875
 876                 found = false;
 877
 878                 f2fs_down_write(&io->bio_list_lock);
 879                 list_for_each_entry(be, head, list) {
 880                         if (target)
 881                                 found = (target == be->bio);
 882                         else
 883                                 found = __has_merged_page(be->bio, NULL,
 884                                                                 page, 0);
 885                         if (found) {
 886                                 target = be->bio;
 887                                 del_bio_entry(be);
 888                                 break;
 889                         }
 890                 }
 891                 f2fs_up_write(&io->bio_list_lock);
 892         }
 893
 894         if (found)
 895                 __submit_bio(sbi, target, DATA);
 896         if (bio && *bio) {
 897                 bio_put(*bio);
 898                 *bio = NULL;
 899         }
 900 }
 901
 902 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
 903 {
 904         struct bio *bio = *fio->bio;
 905         struct page *page = fio->encrypted_page ?
 906                         fio->encrypted_page : fio->page;
 907
 908         if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
 909                         __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
 910                 return -EFSCORRUPTED;
 911
 912         trace_f2fs_submit_page_bio(page, fio);
 913
 914         if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
 915                                                 fio->new_blkaddr))
 916                 f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
 917 alloc_new:
 918         if (!bio) {
 919                 bio = __bio_alloc(fio, BIO_MAX_VECS);
 920                 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
 921                                        fio->page->index, fio, GFP_NOIO);
 922
 923                 add_bio_entry(fio->sbi, bio, page, fio->temp);
 924         } else {
 925                 if (add_ipu_page(fio, &bio, page))
 926                         goto alloc_new;
 927         }
 928
 929         if (fio->io_wbc)
 930                 wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
 931
 932         inc_page_count(fio->sbi, WB_DATA_TYPE(page));
 933
 934         *fio->last_block = fio->new_blkaddr;
 935         *fio->bio = bio;
 936
 937         return 0;
 938 }
 939
 940 void f2fs_submit_page_write(struct f2fs_io_info *fio)
 941 {
 942         struct f2fs_sb_info *sbi = fio->sbi;
 943         enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
 944         struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
 945         struct page *bio_page;
 946
 947         f2fs_bug_on(sbi, is_read_io(fio->op));
 948
 949         f2fs_down_write(&io->io_rwsem);
 950 next:
 951         if (fio->in_list) {
 952                 spin_lock(&io->io_lock);
 953                 if (list_empty(&io->io_list)) {
 954                         spin_unlock(&io->io_lock);
 955                         goto out;
 956                 }
 957                 fio = list_first_entry(&io->io_list,
 958                                                 struct f2fs_io_info, list);
 959                 list_del(&fio->list);
 960                 spin_unlock(&io->io_lock);
 961         }
 962
 963         verify_fio_blkaddr(fio);
 964
 965         if (fio->encrypted_page)
 966                 bio_page = fio->encrypted_page;
 967         else if (fio->compressed_page)
 968                 bio_page = fio->compressed_page;
 969         else
 970                 bio_page = fio->page;
 971
 972         /* set submitted = true as a return value */
 973         fio->submitted = true;
 974
 975         inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 976
 977         if (io->bio &&
 978             (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
 979                               fio->new_blkaddr) ||
 980              !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
 981                                        bio_page->index, fio)))
 982                 __submit_merged_bio(io);
 983 alloc_new:
 984         if (io->bio == NULL) {
 985                 if (F2FS_IO_ALIGNED(sbi) &&
 986                                 (fio->type == DATA || fio->type == NODE) &&
 987                                 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
 988                         dec_page_count(sbi, WB_DATA_TYPE(bio_page));
 989                         fio->retry = true;
 990                         goto skip;
 991                 }
 992                 io->bio = __bio_alloc(fio, BIO_MAX_VECS);
 993                 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
 994                                        bio_page->index, fio, GFP_NOIO);
 995                 io->fio = *fio;
 996         }
 997
 998         if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
 999                 __submit_merged_bio(io);
1000                 goto alloc_new;
1001         }
1002
1003         if (fio->io_wbc)
1004                 wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
1005
1006         io->last_block_in_bio = fio->new_blkaddr;
1007
1008         trace_f2fs_submit_page_write(fio->page, fio);
1009 skip:
1010         if (fio->in_list)
1011                 goto next;
1012 out:
1013         if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1014                                 !f2fs_is_checkpoint_ready(sbi))
1015                 __submit_merged_bio(io);
1016         f2fs_up_write(&io->io_rwsem);
1017 }
1018
1019 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
1020                                       unsigned nr_pages, blk_opf_t op_flag,
1021                                       pgoff_t first_idx, bool for_write)
1022 {
1023         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1024         struct bio *bio;
1025         struct bio_post_read_ctx *ctx = NULL;
1026         unsigned int post_read_steps = 0;
1027         sector_t sector;
1028         struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
1029
1030         bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
1031                                REQ_OP_READ | op_flag,
1032                                for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
1033         if (!bio)
1034                 return ERR_PTR(-ENOMEM);
1035         bio->bi_iter.bi_sector = sector;
1036         f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1037         bio->bi_end_io = f2fs_read_end_io;
1038
1039         if (fscrypt_inode_uses_fs_layer_crypto(inode))
1040                 post_read_steps |= STEP_DECRYPT;
1041
1042         if (f2fs_need_verity(inode, first_idx))
1043                 post_read_steps |= STEP_VERITY;
1044
1045         /*
1046          * STEP_DECOMPRESS is handled specially, since a compressed file might
1047          * contain both compressed and uncompressed clusters.  We'll allocate a
1048          * bio_post_read_ctx if the file is compressed, but the caller is
1049          * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1050          */
1051
1052         if (post_read_steps || f2fs_compressed_file(inode)) {
1053                 /* Due to the mempool, this never fails. */
1054                 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1055                 ctx->bio = bio;
1056                 ctx->sbi = sbi;
1057                 ctx->enabled_steps = post_read_steps;
1058                 ctx->fs_blkaddr = blkaddr;
1059                 bio->bi_private = ctx;
1060         }
1061         iostat_alloc_and_bind_ctx(sbi, bio, ctx);
1062
1063         return bio;
1064 }
1065
1066 /* This can handle encryption stuffs */
1067 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
1068                                  block_t blkaddr, blk_opf_t op_flags,
1069                                  bool for_write)
1070 {
1071         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1072         struct bio *bio;
1073
1074         bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
1075                                         page->index, for_write);
1076         if (IS_ERR(bio))
1077                 return PTR_ERR(bio);
1078
1079         /* wait for GCed page writeback via META_MAPPING */
1080         f2fs_wait_on_block_writeback(inode, blkaddr);
1081
1082         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
1083                 bio_put(bio);
1084                 return -EFAULT;
1085         }
1086         ClearPageError(page);
1087         inc_page_count(sbi, F2FS_RD_DATA);
1088         f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
1089         __submit_bio(sbi, bio, DATA);
1090         return 0;
1091 }
1092
1093 static void __set_data_blkaddr(struct dnode_of_data *dn)
1094 {
1095         struct f2fs_node *rn = F2FS_NODE(dn->node_page);
1096         __le32 *addr_array;
1097         int base = 0;
1098
1099         if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
1100                 base = get_extra_isize(dn->inode);
1101
1102         /* Get physical address of data block */
1103         addr_array = blkaddr_in_node(rn);
1104         addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1105 }
1106
1107 /*
1108  * Lock ordering for the change of data block address:
1109  * ->data_page
1110  *  ->node_page
1111  *    update block addresses in the node page
1112  */
1113 void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
1114 {
1115         f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1116         __set_data_blkaddr(dn);
1117         if (set_page_dirty(dn->node_page))
1118                 dn->node_changed = true;
1119 }
1120
1121 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1122 {
1123         dn->data_blkaddr = blkaddr;
1124         f2fs_set_data_blkaddr(dn);
1125         f2fs_update_extent_cache(dn);
1126 }
1127
1128 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
1129 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1130 {
1131         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1132         int err;
1133
1134         if (!count)
1135                 return 0;
1136
1137         if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1138                 return -EPERM;
1139         if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1140                 return err;
1141
1142         trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1143                                                 dn->ofs_in_node, count);
1144
1145         f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1146
1147         for (; count > 0; dn->ofs_in_node++) {
1148                 block_t blkaddr = f2fs_data_blkaddr(dn);
1149
1150                 if (blkaddr == NULL_ADDR) {
1151                         dn->data_blkaddr = NEW_ADDR;
1152                         __set_data_blkaddr(dn);
1153                         count--;
1154                 }
1155         }
1156
1157         if (set_page_dirty(dn->node_page))
1158                 dn->node_changed = true;
1159         return 0;
1160 }
1161
1162 /* Should keep dn->ofs_in_node unchanged */
1163 int f2fs_reserve_new_block(struct dnode_of_data *dn)
1164 {
1165         unsigned int ofs_in_node = dn->ofs_in_node;
1166         int ret;
1167
1168         ret = f2fs_reserve_new_blocks(dn, 1);
1169         dn->ofs_in_node = ofs_in_node;
1170         return ret;
1171 }
1172
1173 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1174 {
1175         bool need_put = dn->inode_page ? false : true;
1176         int err;
1177
1178         err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1179         if (err)
1180                 return err;
1181
1182         if (dn->data_blkaddr == NULL_ADDR)
1183                 err = f2fs_reserve_new_block(dn);
1184         if (err || need_put)
1185                 f2fs_put_dnode(dn);
1186         return err;
1187 }
1188
1189 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
1190 {
1191         struct extent_info ei = {0, };
1192         struct inode *inode = dn->inode;
1193
1194         if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1195                 dn->data_blkaddr = ei.blk + index - ei.fofs;
1196                 return 0;
1197         }
1198
1199         return f2fs_reserve_block(dn, index);
1200 }
1201
1202 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
1203                                      blk_opf_t op_flags, bool for_write)
1204 {
1205         struct address_space *mapping = inode->i_mapping;
1206         struct dnode_of_data dn;
1207         struct page *page;
1208         struct extent_info ei = {0, };
1209         int err;
1210
1211         page = f2fs_grab_cache_page(mapping, index, for_write);
1212         if (!page)
1213                 return ERR_PTR(-ENOMEM);
1214
1215         if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1216                 dn.data_blkaddr = ei.blk + index - ei.fofs;
1217                 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1218                                                 DATA_GENERIC_ENHANCE_READ)) {
1219                         err = -EFSCORRUPTED;
1220                         goto put_err;
1221                 }
1222                 goto got_it;
1223         }
1224
1225         set_new_dnode(&dn, inode, NULL, NULL, 0);
1226         err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1227         if (err)
1228                 goto put_err;
1229         f2fs_put_dnode(&dn);
1230
1231         if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1232                 err = -ENOENT;
1233                 goto put_err;
1234         }
1235         if (dn.data_blkaddr != NEW_ADDR &&
1236                         !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1237                                                 dn.data_blkaddr,
1238                                                 DATA_GENERIC_ENHANCE)) {
1239                 err = -EFSCORRUPTED;
1240                 goto put_err;
1241         }
1242 got_it:
1243         if (PageUptodate(page)) {
1244                 unlock_page(page);
1245                 return page;
1246         }
1247
1248         /*
1249          * A new dentry page is allocated but not able to be written, since its
1250          * new inode page couldn't be allocated due to -ENOSPC.
1251          * In such the case, its blkaddr can be remained as NEW_ADDR.
1252          * see, f2fs_add_link -> f2fs_get_new_data_page ->
1253          * f2fs_init_inode_metadata.
1254          */
1255         if (dn.data_blkaddr == NEW_ADDR) {
1256                 zero_user_segment(page, 0, PAGE_SIZE);
1257                 if (!PageUptodate(page))
1258                         SetPageUptodate(page);
1259                 unlock_page(page);
1260                 return page;
1261         }
1262
1263         err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
1264                                                 op_flags, for_write);
1265         if (err)
1266                 goto put_err;
1267         return page;
1268
1269 put_err:
1270         f2fs_put_page(page, 1);
1271         return ERR_PTR(err);
1272 }
1273
1274 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
1275 {
1276         struct address_space *mapping = inode->i_mapping;
1277         struct page *page;
1278
1279         page = find_get_page(mapping, index);
1280         if (page && PageUptodate(page))
1281                 return page;
1282         f2fs_put_page(page, 0);
1283
1284         page = f2fs_get_read_data_page(inode, index, 0, false);
1285         if (IS_ERR(page))
1286                 return page;
1287
1288         if (PageUptodate(page))
1289                 return page;
1290
1291         wait_on_page_locked(page);
1292         if (unlikely(!PageUptodate(page))) {
1293                 f2fs_put_page(page, 0);
1294                 return ERR_PTR(-EIO);
1295         }
1296         return page;
1297 }
1298
1299 /*
1300  * If it tries to access a hole, return an error.
1301  * Because, the callers, functions in dir.c and GC, should be able to know
1302  * whether this page exists or not.
1303  */
1304 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
1305                                                         bool for_write)
1306 {
1307         struct address_space *mapping = inode->i_mapping;
1308         struct page *page;
1309 repeat:
1310         page = f2fs_get_read_data_page(inode, index, 0, for_write);
1311         if (IS_ERR(page))
1312                 return page;
1313
1314         /* wait for read completion */
1315         lock_page(page);
1316         if (unlikely(page->mapping != mapping)) {
1317                 f2fs_put_page(page, 1);
1318                 goto repeat;
1319         }
1320         if (unlikely(!PageUptodate(page))) {
1321                 f2fs_put_page(page, 1);
1322                 return ERR_PTR(-EIO);
1323         }
1324         return page;
1325 }
1326
1327 /*
1328  * Caller ensures that this data page is never allocated.
1329  * A new zero-filled data page is allocated in the page cache.
1330  *
1331  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1332  * f2fs_unlock_op().
1333  * Note that, ipage is set only by make_empty_dir, and if any error occur,
1334  * ipage should be released by this function.
1335  */
1336 struct page *f2fs_get_new_data_page(struct inode *inode,
1337                 struct page *ipage, pgoff_t index, bool new_i_size)
1338 {
1339         struct address_space *mapping = inode->i_mapping;
1340         struct page *page;
1341         struct dnode_of_data dn;
1342         int err;
1343
1344         page = f2fs_grab_cache_page(mapping, index, true);
1345         if (!page) {
1346                 /*
1347                  * before exiting, we should make sure ipage will be released
1348                  * if any error occur.
1349                  */
1350                 f2fs_put_page(ipage, 1);
1351                 return ERR_PTR(-ENOMEM);
1352         }
1353
1354         set_new_dnode(&dn, inode, ipage, NULL, 0);
1355         err = f2fs_reserve_block(&dn, index);
1356         if (err) {
1357                 f2fs_put_page(page, 1);
1358                 return ERR_PTR(err);
1359         }
1360         if (!ipage)
1361                 f2fs_put_dnode(&dn);
1362
1363         if (PageUptodate(page))
1364                 goto got_it;
1365
1366         if (dn.data_blkaddr == NEW_ADDR) {
1367                 zero_user_segment(page, 0, PAGE_SIZE);
1368                 if (!PageUptodate(page))
1369                         SetPageUptodate(page);
1370         } else {
1371                 f2fs_put_page(page, 1);
1372
1373                 /* if ipage exists, blkaddr should be NEW_ADDR */
1374                 f2fs_bug_on(F2FS_I_SB(inode), ipage);
1375                 page = f2fs_get_lock_data_page(inode, index, true);
1376                 if (IS_ERR(page))
1377                         return page;
1378         }
1379 got_it:
1380         if (new_i_size && i_size_read(inode) <
1381                                 ((loff_t)(index + 1) << PAGE_SHIFT))
1382                 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1383         return page;
1384 }
1385
1386 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1387 {
1388         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1389         struct f2fs_summary sum;
1390         struct node_info ni;
1391         block_t old_blkaddr;
1392         blkcnt_t count = 1;
1393         int err;
1394
1395         if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1396                 return -EPERM;
1397
1398         err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
1399         if (err)
1400                 return err;
1401
1402         dn->data_blkaddr = f2fs_data_blkaddr(dn);
1403         if (dn->data_blkaddr != NULL_ADDR)
1404                 goto alloc;
1405
1406         if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1407                 return err;
1408
1409 alloc:
1410         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1411         old_blkaddr = dn->data_blkaddr;
1412         f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1413                                 &sum, seg_type, NULL);
1414         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
1415                 invalidate_mapping_pages(META_MAPPING(sbi),
1416                                         old_blkaddr, old_blkaddr);
1417                 f2fs_invalidate_compress_page(sbi, old_blkaddr);
1418         }
1419         f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1420         return 0;
1421 }
1422
1423 void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
1424 {
1425         if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1426                 if (lock)
1427                         f2fs_down_read(&sbi->node_change);
1428                 else
1429                         f2fs_up_read(&sbi->node_change);
1430         } else {
1431                 if (lock)
1432                         f2fs_lock_op(sbi);
1433                 else
1434                         f2fs_unlock_op(sbi);
1435         }
1436 }
1437
1438 /*
1439  * f2fs_map_blocks() tries to find or build mapping relationship which
1440  * maps continuous logical blocks to physical blocks, and return such
1441  * info via f2fs_map_blocks structure.
1442  */
1443 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
1444                                                 int create, int flag)
1445 {
1446         unsigned int maxblocks = map->m_len;
1447         struct dnode_of_data dn;
1448         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1449         int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1450         pgoff_t pgofs, end_offset, end;
1451         int err = 0, ofs = 1;
1452         unsigned int ofs_in_node, last_ofs_in_node;
1453         blkcnt_t prealloc;
1454         struct extent_info ei = {0, };
1455         block_t blkaddr;
1456         unsigned int start_pgofs;
1457         int bidx = 0;
1458
1459         if (!maxblocks)
1460                 return 0;
1461
1462         map->m_bdev = inode->i_sb->s_bdev;
1463         map->m_multidev_dio =
1464                 f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1465
1466         map->m_len = 0;
1467         map->m_flags = 0;
1468
1469         /* it only supports block size == page size */
1470         pgofs = (pgoff_t)map->m_lblk;
1471         end = pgofs + maxblocks;
1472
1473         if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1474                 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1475                                                         map->m_may_create)
1476                         goto next_dnode;
1477
1478                 map->m_pblk = ei.blk + pgofs - ei.fofs;
1479                 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
1480                 map->m_flags = F2FS_MAP_MAPPED;
1481                 if (map->m_next_extent)
1482                         *map->m_next_extent = pgofs + map->m_len;
1483
1484                 /* for hardware encryption, but to avoid potential issue in future */
1485                 if (flag == F2FS_GET_BLOCK_DIO)
1486                         f2fs_wait_on_block_writeback_range(inode,
1487                                                 map->m_pblk, map->m_len);
1488
1489                 if (map->m_multidev_dio) {
1490                         block_t blk_addr = map->m_pblk;
1491
1492                         bidx = f2fs_target_device_index(sbi, map->m_pblk);
1493
1494                         map->m_bdev = FDEV(bidx).bdev;
1495                         map->m_pblk -= FDEV(bidx).start_blk;
1496                         map->m_len = min(map->m_len,
1497                                 FDEV(bidx).end_blk + 1 - map->m_pblk);
1498
1499                         if (map->m_may_create)
1500                                 f2fs_update_device_state(sbi, inode->i_ino,
1501                                                         blk_addr, map->m_len);
1502                 }
1503                 goto out;
1504         }
1505
1506 next_dnode:
1507         if (map->m_may_create)
1508                 f2fs_do_map_lock(sbi, flag, true);
1509
1510         /* When reading holes, we need its node page */
1511         set_new_dnode(&dn, inode, NULL, NULL, 0);
1512         err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1513         if (err) {
1514                 if (flag == F2FS_GET_BLOCK_BMAP)
1515                         map->m_pblk = 0;
1516
1517                 if (err == -ENOENT) {
1518                         /*
1519                          * There is one exceptional case that read_node_page()
1520                          * may return -ENOENT due to filesystem has been
1521                          * shutdown or cp_error, so force to convert error
1522                          * number to EIO for such case.
1523                          */
1524                         if (map->m_may_create &&
1525                                 (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1526                                 f2fs_cp_error(sbi))) {
1527                                 err = -EIO;
1528                                 goto unlock_out;
1529                         }
1530
1531                         err = 0;
1532                         if (map->m_next_pgofs)
1533                                 *map->m_next_pgofs =
1534                                         f2fs_get_next_page_offset(&dn, pgofs);
1535                         if (map->m_next_extent)
1536                                 *map->m_next_extent =
1537                                         f2fs_get_next_page_offset(&dn, pgofs);
1538                 }
1539                 goto unlock_out;
1540         }
1541
1542         start_pgofs = pgofs;
1543         prealloc = 0;
1544         last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1545         end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1546
1547 next_block:
1548         blkaddr = f2fs_data_blkaddr(&dn);
1549
1550         if (__is_valid_data_blkaddr(blkaddr) &&
1551                 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1552                 err = -EFSCORRUPTED;
1553                 goto sync_out;
1554         }
1555
1556         if (__is_valid_data_blkaddr(blkaddr)) {
1557                 /* use out-place-update for driect IO under LFS mode */
1558                 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1559                                                         map->m_may_create) {
1560                         err = __allocate_data_block(&dn, map->m_seg_type);
1561                         if (err)
1562                                 goto sync_out;
1563                         blkaddr = dn.data_blkaddr;
1564                         set_inode_flag(inode, FI_APPEND_WRITE);
1565                 }
1566         } else {
1567                 if (create) {
1568                         if (unlikely(f2fs_cp_error(sbi))) {
1569                                 err = -EIO;
1570                                 goto sync_out;
1571                         }
1572                         if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1573                                 if (blkaddr == NULL_ADDR) {
1574                                         prealloc++;
1575                                         last_ofs_in_node = dn.ofs_in_node;
1576                                 }
1577                         } else {
1578                                 WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
1579                                         flag != F2FS_GET_BLOCK_DIO);
1580                                 err = __allocate_data_block(&dn,
1581                                                         map->m_seg_type);
1582                                 if (!err) {
1583                                         if (flag == F2FS_GET_BLOCK_PRE_DIO)
1584                                                 file_need_truncate(inode);
1585                                         set_inode_flag(inode, FI_APPEND_WRITE);
1586                                 }
1587                         }
1588                         if (err)
1589                                 goto sync_out;
1590                         map->m_flags |= F2FS_MAP_NEW;
1591                         blkaddr = dn.data_blkaddr;
1592                 } else {
1593                         if (f2fs_compressed_file(inode) &&
1594                                         f2fs_sanity_check_cluster(&dn) &&
1595                                         (flag != F2FS_GET_BLOCK_FIEMAP ||
1596                                         IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
1597                                 err = -EFSCORRUPTED;
1598                                 goto sync_out;
1599                         }
1600                         if (flag == F2FS_GET_BLOCK_BMAP) {
1601                                 map->m_pblk = 0;
1602                                 goto sync_out;
1603                         }
1604                         if (flag == F2FS_GET_BLOCK_PRECACHE)
1605                                 goto sync_out;
1606                         if (flag == F2FS_GET_BLOCK_FIEMAP &&
1607                                                 blkaddr == NULL_ADDR) {
1608                                 if (map->m_next_pgofs)
1609                                         *map->m_next_pgofs = pgofs + 1;
1610                                 goto sync_out;
1611                         }
1612                         if (flag != F2FS_GET_BLOCK_FIEMAP) {
1613                                 /* for defragment case */
1614                                 if (map->m_next_pgofs)
1615                                         *map->m_next_pgofs = pgofs + 1;
1616                                 goto sync_out;
1617                         }
1618                 }
1619         }
1620
1621         if (flag == F2FS_GET_BLOCK_PRE_AIO)
1622                 goto skip;
1623
1624         if (map->m_multidev_dio)
1625                 bidx = f2fs_target_device_index(sbi, blkaddr);
1626
1627         if (map->m_len == 0) {
1628                 /* preallocated unwritten block should be mapped for fiemap. */
1629                 if (blkaddr == NEW_ADDR)
1630                         map->m_flags |= F2FS_MAP_UNWRITTEN;
1631                 map->m_flags |= F2FS_MAP_MAPPED;
1632
1633                 map->m_pblk = blkaddr;
1634                 map->m_len = 1;
1635
1636                 if (map->m_multidev_dio)
1637                         map->m_bdev = FDEV(bidx).bdev;
1638         } else if ((map->m_pblk != NEW_ADDR &&
1639                         blkaddr == (map->m_pblk + ofs)) ||
1640                         (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1641                         flag == F2FS_GET_BLOCK_PRE_DIO) {
1642                 if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1643                         goto sync_out;
1644                 ofs++;
1645                 map->m_len++;
1646         } else {
1647                 goto sync_out;
1648         }
1649
1650 skip:
1651         dn.ofs_in_node++;
1652         pgofs++;
1653
1654         /* preallocate blocks in batch for one dnode page */
1655         if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1656                         (pgofs == end || dn.ofs_in_node == end_offset)) {
1657
1658                 dn.ofs_in_node = ofs_in_node;
1659                 err = f2fs_reserve_new_blocks(&dn, prealloc);
1660                 if (err)
1661                         goto sync_out;
1662
1663                 map->m_len += dn.ofs_in_node - ofs_in_node;
1664                 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1665                         err = -ENOSPC;
1666                         goto sync_out;
1667                 }
1668                 dn.ofs_in_node = end_offset;
1669         }
1670
1671         if (pgofs >= end)
1672                 goto sync_out;
1673         else if (dn.ofs_in_node < end_offset)
1674                 goto next_block;
1675
1676         if (flag == F2FS_GET_BLOCK_PRECACHE) {
1677                 if (map->m_flags & F2FS_MAP_MAPPED) {
1678                         unsigned int ofs = start_pgofs - map->m_lblk;
1679
1680                         f2fs_update_extent_cache_range(&dn,
1681                                 start_pgofs, map->m_pblk + ofs,
1682                                 map->m_len - ofs);
1683                 }
1684         }
1685
1686         f2fs_put_dnode(&dn);
1687
1688         if (map->m_may_create) {
1689                 f2fs_do_map_lock(sbi, flag, false);
1690                 f2fs_balance_fs(sbi, dn.node_changed);
1691         }
1692         goto next_dnode;
1693
1694 sync_out:
1695
1696         if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1697                 /*
1698                  * for hardware encryption, but to avoid potential issue
1699                  * in future
1700                  */
1701                 f2fs_wait_on_block_writeback_range(inode,
1702                                                 map->m_pblk, map->m_len);
1703
1704                 if (map->m_multidev_dio) {
1705                         block_t blk_addr = map->m_pblk;
1706
1707                         bidx = f2fs_target_device_index(sbi, map->m_pblk);
1708
1709                         map->m_bdev = FDEV(bidx).bdev;
1710                         map->m_pblk -= FDEV(bidx).start_blk;
1711
1712                         if (map->m_may_create)
1713                                 f2fs_update_device_state(sbi, inode->i_ino,
1714                                                         blk_addr, map->m_len);
1715
1716                         f2fs_bug_on(sbi, blk_addr + map->m_len >
1717                                                 FDEV(bidx).end_blk + 1);
1718                 }
1719         }
1720
1721         if (flag == F2FS_GET_BLOCK_PRECACHE) {
1722                 if (map->m_flags & F2FS_MAP_MAPPED) {
1723                         unsigned int ofs = start_pgofs - map->m_lblk;
1724
1725                         f2fs_update_extent_cache_range(&dn,
1726                                 start_pgofs, map->m_pblk + ofs,
1727                                 map->m_len - ofs);
1728                 }
1729                 if (map->m_next_extent)
1730                         *map->m_next_extent = pgofs + 1;
1731         }
1732         f2fs_put_dnode(&dn);
1733 unlock_out:
1734         if (map->m_may_create) {
1735                 f2fs_do_map_lock(sbi, flag, false);
1736                 f2fs_balance_fs(sbi, dn.node_changed);
1737         }
1738 out:
1739         trace_f2fs_map_blocks(inode, map, create, flag, err);
1740         return err;
1741 }
1742
1743 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1744 {
1745         struct f2fs_map_blocks map;
1746         block_t last_lblk;
1747         int err;
1748
1749         if (pos + len > i_size_read(inode))
1750                 return false;
1751
1752         map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1753         map.m_next_pgofs = NULL;
1754         map.m_next_extent = NULL;
1755         map.m_seg_type = NO_CHECK_TYPE;
1756         map.m_may_create = false;
1757         last_lblk = F2FS_BLK_ALIGN(pos + len);
1758
1759         while (map.m_lblk < last_lblk) {
1760                 map.m_len = last_lblk - map.m_lblk;
1761                 err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
1762                 if (err || map.m_len == 0)
1763                         return false;
1764                 map.m_lblk += map.m_len;
1765         }
1766         return true;
1767 }
1768
1769 static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1770 {
1771         return (bytes >> inode->i_blkbits);
1772 }
1773
1774 static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1775 {
1776         return (blks << inode->i_blkbits);
1777 }
1778
1779 static int f2fs_xattr_fiemap(struct inode *inode,
1780                                 struct fiemap_extent_info *fieinfo)
1781 {
1782         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1783         struct page *page;
1784         struct node_info ni;
1785         __u64 phys = 0, len;
1786         __u32 flags;
1787         nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1788         int err = 0;
1789
1790         if (f2fs_has_inline_xattr(inode)) {
1791                 int offset;
1792
1793                 page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1794                                                 inode->i_ino, false);
1795                 if (!page)
1796                         return -ENOMEM;
1797
1798                 err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
1799                 if (err) {
1800                         f2fs_put_page(page, 1);
1801                         return err;
1802                 }
1803
1804                 phys = blks_to_bytes(inode, ni.blk_addr);
1805                 offset = offsetof(struct f2fs_inode, i_addr) +
1806                                         sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1807                                         get_inline_xattr_addrs(inode));
1808
1809                 phys += offset;
1810                 len = inline_xattr_size(inode);
1811
1812                 f2fs_put_page(page, 1);
1813
1814                 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1815
1816                 if (!xnid)
1817                         flags |= FIEMAP_EXTENT_LAST;
1818
1819                 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1820                 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1821                 if (err || err == 1)
1822                         return err;
1823         }
1824
1825         if (xnid) {
1826                 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1827                 if (!page)
1828                         return -ENOMEM;
1829
1830                 err = f2fs_get_node_info(sbi, xnid, &ni, false);
1831                 if (err) {
1832                         f2fs_put_page(page, 1);
1833                         return err;
1834                 }
1835
1836                 phys = blks_to_bytes(inode, ni.blk_addr);
1837                 len = inode->i_sb->s_blocksize;
1838
1839                 f2fs_put_page(page, 1);
1840
1841                 flags = FIEMAP_EXTENT_LAST;
1842         }
1843
1844         if (phys) {
1845                 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1846                 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1847         }
1848
1849         return (err < 0 ? err : 0);
1850 }
1851
1852 static loff_t max_inode_blocks(struct inode *inode)
1853 {
1854         loff_t result = ADDRS_PER_INODE(inode);
1855         loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1856
1857         /* two direct node blocks */
1858         result += (leaf_count * 2);
1859
1860         /* two indirect node blocks */
1861         leaf_count *= NIDS_PER_BLOCK;
1862         result += (leaf_count * 2);
1863
1864         /* one double indirect node block */
1865         leaf_count *= NIDS_PER_BLOCK;
1866         result += leaf_count;
1867
1868         return result;
1869 }
1870
1871 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1872                 u64 start, u64 len)
1873 {
1874         struct f2fs_map_blocks map;
1875         sector_t start_blk, last_blk;
1876         pgoff_t next_pgofs;
1877         u64 logical = 0, phys = 0, size = 0;
1878         u32 flags = 0;
1879         int ret = 0;
1880         bool compr_cluster = false, compr_appended;
1881         unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1882         unsigned int count_in_cluster = 0;
1883         loff_t maxbytes;
1884
1885         if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1886                 ret = f2fs_precache_extents(inode);
1887                 if (ret)
1888                         return ret;
1889         }
1890
1891         ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
1892         if (ret)
1893                 return ret;
1894
1895         inode_lock(inode);
1896
1897         maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
1898         if (start > maxbytes) {
1899                 ret = -EFBIG;
1900                 goto out;
1901         }
1902
1903         if (len > maxbytes || (maxbytes - len) < start)
1904                 len = maxbytes - start;
1905
1906         if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1907                 ret = f2fs_xattr_fiemap(inode, fieinfo);
1908                 goto out;
1909         }
1910
1911         if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
1912                 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1913                 if (ret != -EAGAIN)
1914                         goto out;
1915         }
1916
1917         if (bytes_to_blks(inode, len) == 0)
1918                 len = blks_to_bytes(inode, 1);
1919
1920         start_blk = bytes_to_blks(inode, start);
1921         last_blk = bytes_to_blks(inode, start + len - 1);
1922
1923 next:
1924         memset(&map, 0, sizeof(map));
1925         map.m_lblk = start_blk;
1926         map.m_len = bytes_to_blks(inode, len);
1927         map.m_next_pgofs = &next_pgofs;
1928         map.m_seg_type = NO_CHECK_TYPE;
1929
1930         if (compr_cluster) {
1931                 map.m_lblk += 1;
1932                 map.m_len = cluster_size - count_in_cluster;
1933         }
1934
1935         ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
1936         if (ret)
1937                 goto out;
1938
1939         /* HOLE */
1940         if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
1941                 start_blk = next_pgofs;
1942
1943                 if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
1944                                                 max_inode_blocks(inode)))
1945                         goto prep_next;
1946
1947                 flags |= FIEMAP_EXTENT_LAST;
1948         }
1949
1950         compr_appended = false;
1951         /* In a case of compressed cluster, append this to the last extent */
1952         if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) ||
1953                         !(map.m_flags & F2FS_MAP_FLAGS))) {
1954                 compr_appended = true;
1955                 goto skip_fill;
1956         }
1957
1958         if (size) {
1959                 flags |= FIEMAP_EXTENT_MERGED;
1960                 if (IS_ENCRYPTED(inode))
1961                         flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
1962
1963                 ret = fiemap_fill_next_extent(fieinfo, logical,
1964                                 phys, size, flags);
1965                 trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
1966                 if (ret)
1967                         goto out;
1968                 size = 0;
1969         }
1970
1971         if (start_blk > last_blk)
1972                 goto out;
1973
1974 skip_fill:
1975         if (map.m_pblk == COMPRESS_ADDR) {
1976                 compr_cluster = true;
1977                 count_in_cluster = 1;
1978         } else if (compr_appended) {
1979                 unsigned int appended_blks = cluster_size -
1980                                                 count_in_cluster + 1;
1981                 size += blks_to_bytes(inode, appended_blks);
1982                 start_blk += appended_blks;
1983                 compr_cluster = false;
1984         } else {
1985                 logical = blks_to_bytes(inode, start_blk);
1986                 phys = __is_valid_data_blkaddr(map.m_pblk) ?
1987                         blks_to_bytes(inode, map.m_pblk) : 0;
1988                 size = blks_to_bytes(inode, map.m_len);
1989                 flags = 0;
1990
1991                 if (compr_cluster) {
1992                         flags = FIEMAP_EXTENT_ENCODED;
1993                         count_in_cluster += map.m_len;
1994                         if (count_in_cluster == cluster_size) {
1995                                 compr_cluster = false;
1996                                 size += blks_to_bytes(inode, 1);
1997                         }
1998                 } else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
1999                         flags = FIEMAP_EXTENT_UNWRITTEN;
2000                 }
2001
2002                 start_blk += bytes_to_blks(inode, size);
2003         }
2004
2005 prep_next:
2006         cond_resched();
2007         if (fatal_signal_pending(current))
2008                 ret = -EINTR;
2009         else
2010                 goto next;
2011 out:
2012         if (ret == 1)
2013                 ret = 0;
2014
2015         inode_unlock(inode);
2016         return ret;
2017 }
2018
2019 static inline loff_t f2fs_readpage_limit(struct inode *inode)
2020 {
2021         if (IS_ENABLED(CONFIG_FS_VERITY) &&
2022             (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
2023                 return inode->i_sb->s_maxbytes;
2024
2025         return i_size_read(inode);
2026 }
2027
2028 static int f2fs_read_single_page(struct inode *inode, struct page *page,
2029                                         unsigned nr_pages,
2030                                         struct f2fs_map_blocks *map,
2031                                         struct bio **bio_ret,
2032                                         sector_t *last_block_in_bio,
2033                                         bool is_readahead)
2034 {
2035         struct bio *bio = *bio_ret;
2036         const unsigned blocksize = blks_to_bytes(inode, 1);
2037         sector_t block_in_file;
2038         sector_t last_block;
2039         sector_t last_block_in_file;
2040         sector_t block_nr;
2041         int ret = 0;
2042
2043         block_in_file = (sector_t)page_index(page);
2044         last_block = block_in_file + nr_pages;
2045         last_block_in_file = bytes_to_blks(inode,
2046                         f2fs_readpage_limit(inode) + blocksize - 1);
2047         if (last_block > last_block_in_file)
2048                 last_block = last_block_in_file;
2049
2050         /* just zeroing out page which is beyond EOF */
2051         if (block_in_file >= last_block)
2052                 goto zero_out;
2053         /*
2054          * Map blocks using the previous result first.
2055          */
2056         if ((map->m_flags & F2FS_MAP_MAPPED) &&
2057                         block_in_file > map->m_lblk &&
2058                         block_in_file < (map->m_lblk + map->m_len))
2059                 goto got_it;
2060
2061         /*
2062          * Then do more f2fs_map_blocks() calls until we are
2063          * done with this page.
2064          */
2065         map->m_lblk = block_in_file;
2066         map->m_len = last_block - block_in_file;
2067
2068         ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
2069         if (ret)
2070                 goto out;
2071 got_it:
2072         if ((map->m_flags & F2FS_MAP_MAPPED)) {
2073                 block_nr = map->m_pblk + block_in_file - map->m_lblk;
2074                 SetPageMappedToDisk(page);
2075
2076                 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2077                                                 DATA_GENERIC_ENHANCE_READ)) {
2078                         ret = -EFSCORRUPTED;
2079                         goto out;
2080                 }
2081         } else {
2082 zero_out:
2083                 zero_user_segment(page, 0, PAGE_SIZE);
2084                 if (f2fs_need_verity(inode, page->index) &&
2085                     !fsverity_verify_page(page)) {
2086                         ret = -EIO;
2087                         goto out;
2088                 }
2089                 if (!PageUptodate(page))
2090                         SetPageUptodate(page);
2091                 unlock_page(page);
2092                 goto out;
2093         }
2094
2095         /*
2096          * This page will go to BIO.  Do we need to send this
2097          * BIO off first?
2098          */
2099         if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2100                                        *last_block_in_bio, block_nr) ||
2101                     !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2102 submit_and_realloc:
2103                 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2104                 bio = NULL;
2105         }
2106         if (bio == NULL) {
2107                 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
2108                                 is_readahead ? REQ_RAHEAD : 0, page->index,
2109                                 false);
2110                 if (IS_ERR(bio)) {
2111                         ret = PTR_ERR(bio);
2112                         bio = NULL;
2113                         goto out;
2114                 }
2115         }
2116
2117         /*
2118          * If the page is under writeback, we need to wait for
2119          * its completion to see the correct decrypted data.
2120          */
2121         f2fs_wait_on_block_writeback(inode, block_nr);
2122
2123         if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2124                 goto submit_and_realloc;
2125
2126         inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2127         f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
2128         ClearPageError(page);
2129         *last_block_in_bio = block_nr;
2130         goto out;
2131 out:
2132         *bio_ret = bio;
2133         return ret;
2134 }
2135
2136 #ifdef CONFIG_F2FS_FS_COMPRESSION
2137 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2138                                 unsigned nr_pages, sector_t *last_block_in_bio,
2139                                 bool is_readahead, bool for_write)
2140 {
2141         struct dnode_of_data dn;
2142         struct inode *inode = cc->inode;
2143         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2144         struct bio *bio = *bio_ret;
2145         unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2146         sector_t last_block_in_file;
2147         const unsigned blocksize = blks_to_bytes(inode, 1);
2148         struct decompress_io_ctx *dic = NULL;
2149         struct extent_info ei = {0, };
2150         bool from_dnode = true;
2151         int i;
2152         int ret = 0;
2153
2154         f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2155
2156         last_block_in_file = bytes_to_blks(inode,
2157                         f2fs_readpage_limit(inode) + blocksize - 1);
2158
2159         /* get rid of pages beyond EOF */
2160         for (i = 0; i < cc->cluster_size; i++) {
2161                 struct page *page = cc->rpages[i];
2162
2163                 if (!page)
2164                         continue;
2165                 if ((sector_t)page->index >= last_block_in_file) {
2166                         zero_user_segment(page, 0, PAGE_SIZE);
2167                         if (!PageUptodate(page))
2168                                 SetPageUptodate(page);
2169                 } else if (!PageUptodate(page)) {
2170                         continue;
2171                 }
2172                 unlock_page(page);
2173                 if (for_write)
2174                         put_page(page);
2175                 cc->rpages[i] = NULL;
2176                 cc->nr_rpages--;
2177         }
2178
2179         /* we are done since all pages are beyond EOF */
2180         if (f2fs_cluster_is_empty(cc))
2181                 goto out;
2182
2183         if (f2fs_lookup_extent_cache(inode, start_idx, &ei))
2184                 from_dnode = false;
2185
2186         if (!from_dnode)
2187                 goto skip_reading_dnode;
2188
2189         set_new_dnode(&dn, inode, NULL, NULL, 0);
2190         ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2191         if (ret)
2192                 goto out;
2193
2194         f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2195
2196 skip_reading_dnode:
2197         for (i = 1; i < cc->cluster_size; i++) {
2198                 block_t blkaddr;
2199
2200                 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2201                                         dn.ofs_in_node + i) :
2202                                         ei.blk + i - 1;
2203
2204                 if (!__is_valid_data_blkaddr(blkaddr))
2205                         break;
2206
2207                 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2208                         ret = -EFAULT;
2209                         goto out_put_dnode;
2210                 }
2211                 cc->nr_cpages++;
2212
2213                 if (!from_dnode && i >= ei.c_len)
2214                         break;
2215         }
2216
2217         /* nothing to decompress */
2218         if (cc->nr_cpages == 0) {
2219                 ret = 0;
2220                 goto out_put_dnode;
2221         }
2222
2223         dic = f2fs_alloc_dic(cc);
2224         if (IS_ERR(dic)) {
2225                 ret = PTR_ERR(dic);
2226                 goto out_put_dnode;
2227         }
2228
2229         for (i = 0; i < cc->nr_cpages; i++) {
2230                 struct page *page = dic->cpages[i];
2231                 block_t blkaddr;
2232                 struct bio_post_read_ctx *ctx;
2233
2234                 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2235                                         dn.ofs_in_node + i + 1) :
2236                                         ei.blk + i;
2237
2238                 f2fs_wait_on_block_writeback(inode, blkaddr);
2239
2240                 if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
2241                         if (atomic_dec_and_test(&dic->remaining_pages))
2242                                 f2fs_decompress_cluster(dic, true);
2243                         continue;
2244                 }
2245
2246                 if (bio && (!page_is_mergeable(sbi, bio,
2247                                         *last_block_in_bio, blkaddr) ||
2248                     !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2249 submit_and_realloc:
2250                         __submit_bio(sbi, bio, DATA);
2251                         bio = NULL;
2252                 }
2253
2254                 if (!bio) {
2255                         bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2256                                         is_readahead ? REQ_RAHEAD : 0,
2257                                         page->index, for_write);
2258                         if (IS_ERR(bio)) {
2259                                 ret = PTR_ERR(bio);
2260                                 f2fs_decompress_end_io(dic, ret, true);
2261                                 f2fs_put_dnode(&dn);
2262                                 *bio_ret = NULL;
2263                                 return ret;
2264                         }
2265                 }
2266
2267                 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2268                         goto submit_and_realloc;
2269
2270                 ctx = get_post_read_ctx(bio);
2271                 ctx->enabled_steps |= STEP_DECOMPRESS;
2272                 refcount_inc(&dic->refcnt);
2273
2274                 inc_page_count(sbi, F2FS_RD_DATA);
2275                 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
2276                 f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
2277                 ClearPageError(page);
2278                 *last_block_in_bio = blkaddr;
2279         }
2280
2281         if (from_dnode)
2282                 f2fs_put_dnode(&dn);
2283
2284         *bio_ret = bio;
2285         return 0;
2286
2287 out_put_dnode:
2288         if (from_dnode)
2289                 f2fs_put_dnode(&dn);
2290 out:
2291         for (i = 0; i < cc->cluster_size; i++) {
2292                 if (cc->rpages[i]) {
2293                         ClearPageUptodate(cc->rpages[i]);
2294                         ClearPageError(cc->rpages[i]);
2295                         unlock_page(cc->rpages[i]);
2296                 }
2297         }
2298         *bio_ret = bio;
2299         return ret;
2300 }
2301 #endif
2302
2303 /*
2304  * This function was originally taken from fs/mpage.c, and customized for f2fs.
2305  * Major change was from block_size == page_size in f2fs by default.
2306  */
2307 static int f2fs_mpage_readpages(struct inode *inode,
2308                 struct readahead_control *rac, struct page *page)
2309 {
2310         struct bio *bio = NULL;
2311         sector_t last_block_in_bio = 0;
2312         struct f2fs_map_blocks map;
2313 #ifdef CONFIG_F2FS_FS_COMPRESSION
2314         struct compress_ctx cc = {
2315                 .inode = inode,
2316                 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2317                 .cluster_size = F2FS_I(inode)->i_cluster_size,
2318                 .cluster_idx = NULL_CLUSTER,
2319                 .rpages = NULL,
2320                 .cpages = NULL,
2321                 .nr_rpages = 0,
2322                 .nr_cpages = 0,
2323         };
2324         pgoff_t nc_cluster_idx = NULL_CLUSTER;
2325 #endif
2326         unsigned nr_pages = rac ? readahead_count(rac) : 1;
2327         unsigned max_nr_pages = nr_pages;
2328         int ret = 0;
2329
2330         map.m_pblk = 0;
2331         map.m_lblk = 0;
2332         map.m_len = 0;
2333         map.m_flags = 0;
2334         map.m_next_pgofs = NULL;
2335         map.m_next_extent = NULL;
2336         map.m_seg_type = NO_CHECK_TYPE;
2337         map.m_may_create = false;
2338
2339         for (; nr_pages; nr_pages--) {
2340                 if (rac) {
2341                         page = readahead_page(rac);
2342                         prefetchw(&page->flags);
2343                 }
2344
2345 #ifdef CONFIG_F2FS_FS_COMPRESSION
2346                 if (f2fs_compressed_file(inode)) {
2347                         /* there are remained comressed pages, submit them */
2348                         if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
2349                                 ret = f2fs_read_multi_pages(&cc, &bio,
2350                                                         max_nr_pages,
2351                                                         &last_block_in_bio,
2352                                                         rac != NULL, false);
2353                                 f2fs_destroy_compress_ctx(&cc, false);
2354                                 if (ret)
2355                                         goto set_error_page;
2356                         }
2357                         if (cc.cluster_idx == NULL_CLUSTER) {
2358                                 if (nc_cluster_idx ==
2359                                         page->index >> cc.log_cluster_size) {
2360                                         goto read_single_page;
2361                                 }
2362
2363                                 ret = f2fs_is_compressed_cluster(inode, page->index);
2364                                 if (ret < 0)
2365                                         goto set_error_page;
2366                                 else if (!ret) {
2367                                         nc_cluster_idx =
2368                                                 page->index >> cc.log_cluster_size;
2369                                         goto read_single_page;
2370                                 }
2371
2372                                 nc_cluster_idx = NULL_CLUSTER;
2373                         }
2374                         ret = f2fs_init_compress_ctx(&cc);
2375                         if (ret)
2376                                 goto set_error_page;
2377
2378                         f2fs_compress_ctx_add_page(&cc, page);
2379
2380                         goto next_page;
2381                 }
2382 read_single_page:
2383 #endif
2384
2385                 ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
2386                                         &bio, &last_block_in_bio, rac);
2387                 if (ret) {
2388 #ifdef CONFIG_F2FS_FS_COMPRESSION
2389 set_error_page:
2390 #endif
2391                         SetPageError(page);
2392                         zero_user_segment(page, 0, PAGE_SIZE);
2393                         unlock_page(page);
2394                 }
2395 #ifdef CONFIG_F2FS_FS_COMPRESSION
2396 next_page:
2397 #endif
2398                 if (rac)
2399                         put_page(page);
2400
2401 #ifdef CONFIG_F2FS_FS_COMPRESSION
2402                 if (f2fs_compressed_file(inode)) {
2403                         /* last page */
2404                         if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2405                                 ret = f2fs_read_multi_pages(&cc, &bio,
2406                                                         max_nr_pages,
2407                                                         &last_block_in_bio,
2408                                                         rac != NULL, false);
2409                                 f2fs_destroy_compress_ctx(&cc, false);
2410                         }
2411                 }
2412 #endif
2413         }
2414         if (bio)
2415                 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2416         return ret;
2417 }
2418
2419 static int f2fs_read_data_folio(struct file *file, struct folio *folio)
2420 {
2421         struct page *page = &folio->page;
2422         struct inode *inode = page_file_mapping(page)->host;
2423         int ret = -EAGAIN;
2424
2425         trace_f2fs_readpage(page, DATA);
2426
2427         if (!f2fs_is_compress_backend_ready(inode)) {
2428                 unlock_page(page);
2429                 return -EOPNOTSUPP;
2430         }
2431
2432         /* If the file has inline data, try to read it directly */
2433         if (f2fs_has_inline_data(inode))
2434                 ret = f2fs_read_inline_data(inode, page);
2435         if (ret == -EAGAIN)
2436                 ret = f2fs_mpage_readpages(inode, NULL, page);
2437         return ret;
2438 }
2439
2440 static void f2fs_readahead(struct readahead_control *rac)
2441 {
2442         struct inode *inode = rac->mapping->host;
2443
2444         trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
2445
2446         if (!f2fs_is_compress_backend_ready(inode))
2447                 return;
2448
2449         /* If the file has inline data, skip readahead */
2450         if (f2fs_has_inline_data(inode))
2451                 return;
2452
2453         f2fs_mpage_readpages(inode, rac, NULL);
2454 }
2455
2456 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2457 {
2458         struct inode *inode = fio->page->mapping->host;
2459         struct page *mpage, *page;
2460         gfp_t gfp_flags = GFP_NOFS;
2461
2462         if (!f2fs_encrypted_file(inode))
2463                 return 0;
2464
2465         page = fio->compressed_page ? fio->compressed_page : fio->page;
2466
2467         /* wait for GCed page writeback via META_MAPPING */
2468         f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2469
2470         if (fscrypt_inode_uses_inline_crypto(inode))
2471                 return 0;
2472
2473 retry_encrypt:
2474         fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2475                                         PAGE_SIZE, 0, gfp_flags);
2476         if (IS_ERR(fio->encrypted_page)) {
2477                 /* flush pending IOs and wait for a while in the ENOMEM case */
2478                 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2479                         f2fs_flush_merged_writes(fio->sbi);
2480                         memalloc_retry_wait(GFP_NOFS);
2481                         gfp_flags |= __GFP_NOFAIL;
2482                         goto retry_encrypt;
2483                 }
2484                 return PTR_ERR(fio->encrypted_page);
2485         }
2486
2487         mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
2488         if (mpage) {
2489                 if (PageUptodate(mpage))
2490                         memcpy(page_address(mpage),
2491                                 page_address(fio->encrypted_page), PAGE_SIZE);
2492                 f2fs_put_page(mpage, 1);
2493         }
2494         return 0;
2495 }
2496
2497 static inline bool check_inplace_update_policy(struct inode *inode,
2498                                 struct f2fs_io_info *fio)
2499 {
2500         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2501         unsigned int policy = SM_I(sbi)->ipu_policy;
2502
2503         if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) &&
2504                         is_inode_flag_set(inode, FI_OPU_WRITE))
2505                 return false;
2506         if (policy & (0x1 << F2FS_IPU_FORCE))
2507                 return true;
2508         if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
2509                 return true;
2510         if (policy & (0x1 << F2FS_IPU_UTIL) &&
2511                         utilization(sbi) > SM_I(sbi)->min_ipu_util)
2512                 return true;
2513         if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
2514                         utilization(sbi) > SM_I(sbi)->min_ipu_util)
2515                 return true;
2516
2517         /*
2518          * IPU for rewrite async pages
2519          */
2520         if (policy & (0x1 << F2FS_IPU_ASYNC) &&
2521                         fio && fio->op == REQ_OP_WRITE &&
2522                         !(fio->op_flags & REQ_SYNC) &&
2523                         !IS_ENCRYPTED(inode))
2524                 return true;
2525
2526         /* this is only set during fdatasync */
2527         if (policy & (0x1 << F2FS_IPU_FSYNC) &&
2528                         is_inode_flag_set(inode, FI_NEED_IPU))
2529                 return true;
2530
2531         if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2532                         !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2533                 return true;
2534
2535         return false;
2536 }
2537
2538 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2539 {
2540         /* swap file is migrating in aligned write mode */
2541         if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2542                 return false;
2543
2544         if (f2fs_is_pinned_file(inode))
2545                 return true;
2546
2547         /* if this is cold file, we should overwrite to avoid fragmentation */
2548         if (file_is_cold(inode))
2549                 return true;
2550
2551         return check_inplace_update_policy(inode, fio);
2552 }
2553
2554 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2555 {
2556         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2557
2558         /* The below cases were checked when setting it. */
2559         if (f2fs_is_pinned_file(inode))
2560                 return false;
2561         if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2562                 return true;
2563         if (f2fs_lfs_mode(sbi))
2564                 return true;
2565         if (S_ISDIR(inode->i_mode))
2566                 return true;
2567         if (IS_NOQUOTA(inode))
2568                 return true;
2569         if (f2fs_is_atomic_file(inode))
2570                 return true;
2571
2572         /* swap file is migrating in aligned write mode */
2573         if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2574                 return true;
2575
2576         if (is_inode_flag_set(inode, FI_OPU_WRITE))
2577                 return true;
2578
2579         if (fio) {
2580                 if (page_private_gcing(fio->page))
2581                         return true;
2582                 if (page_private_dummy(fio->page))
2583                         return true;
2584                 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2585                         f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2586                         return true;
2587         }
2588         return false;
2589 }
2590
2591 static inline bool need_inplace_update(struct f2fs_io_info *fio)
2592 {
2593         struct inode *inode = fio->page->mapping->host;
2594
2595         if (f2fs_should_update_outplace(inode, fio))
2596                 return false;
2597
2598         return f2fs_should_update_inplace(inode, fio);
2599 }
2600
2601 int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2602 {
2603         struct page *page = fio->page;
2604         struct inode *inode = page->mapping->host;
2605         struct dnode_of_data dn;
2606         struct extent_info ei = {0, };
2607         struct node_info ni;
2608         bool ipu_force = false;
2609         int err = 0;
2610
2611         /* Use COW inode to make dnode_of_data for atomic write */
2612         if (f2fs_is_atomic_file(inode))
2613                 set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
2614         else
2615                 set_new_dnode(&dn, inode, NULL, NULL, 0);
2616
2617         if (need_inplace_update(fio) &&
2618                         f2fs_lookup_extent_cache(inode, page->index, &ei)) {
2619                 fio->old_blkaddr = ei.blk + page->index - ei.fofs;
2620
2621                 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2622                                                 DATA_GENERIC_ENHANCE))
2623                         return -EFSCORRUPTED;
2624
2625                 ipu_force = true;
2626                 fio->need_lock = LOCK_DONE;
2627                 goto got_it;
2628         }
2629
2630         /* Deadlock due to between page->lock and f2fs_lock_op */
2631         if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
2632                 return -EAGAIN;
2633
2634         err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
2635         if (err)
2636                 goto out;
2637
2638         fio->old_blkaddr = dn.data_blkaddr;
2639
2640         /* This page is already truncated */
2641         if (fio->old_blkaddr == NULL_ADDR) {
2642                 ClearPageUptodate(page);
2643                 clear_page_private_gcing(page);
2644                 goto out_writepage;
2645         }
2646 got_it:
2647         if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2648                 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2649                                                 DATA_GENERIC_ENHANCE)) {
2650                 err = -EFSCORRUPTED;
2651                 goto out_writepage;
2652         }
2653
2654         /*
2655          * If current allocation needs SSR,
2656          * it had better in-place writes for updated data.
2657          */
2658         if (ipu_force ||
2659                 (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2660                                         need_inplace_update(fio))) {
2661                 err = f2fs_encrypt_one_page(fio);
2662                 if (err)
2663                         goto out_writepage;
2664
2665                 set_page_writeback(page);
2666                 ClearPageError(page);
2667                 f2fs_put_dnode(&dn);
2668                 if (fio->need_lock == LOCK_REQ)
2669                         f2fs_unlock_op(fio->sbi);
2670                 err = f2fs_inplace_write_data(fio);
2671                 if (err) {
2672                         if (fscrypt_inode_uses_fs_layer_crypto(inode))
2673                                 fscrypt_finalize_bounce_page(&fio->encrypted_page);
2674                         if (PageWriteback(page))
2675                                 end_page_writeback(page);
2676                 } else {
2677                         set_inode_flag(inode, FI_UPDATE_WRITE);
2678                 }
2679                 trace_f2fs_do_write_data_page(fio->page, IPU);
2680                 return err;
2681         }
2682
2683         if (fio->need_lock == LOCK_RETRY) {
2684                 if (!f2fs_trylock_op(fio->sbi)) {
2685                         err = -EAGAIN;
2686                         goto out_writepage;
2687                 }
2688                 fio->need_lock = LOCK_REQ;
2689         }
2690
2691         err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
2692         if (err)
2693                 goto out_writepage;
2694
2695         fio->version = ni.version;
2696
2697         err = f2fs_encrypt_one_page(fio);
2698         if (err)
2699                 goto out_writepage;
2700
2701         set_page_writeback(page);
2702         ClearPageError(page);
2703
2704         if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2705                 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
2706
2707         /* LFS mode write path */
2708         f2fs_outplace_write_data(&dn, fio);
2709         trace_f2fs_do_write_data_page(page, OPU);
2710         set_inode_flag(inode, FI_APPEND_WRITE);
2711         if (page->index == 0)
2712                 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
2713 out_writepage:
2714         f2fs_put_dnode(&dn);
2715 out:
2716         if (fio->need_lock == LOCK_REQ)
2717                 f2fs_unlock_op(fio->sbi);
2718         return err;
2719 }
2720
2721 int f2fs_write_single_data_page(struct page *page, int *submitted,
2722                                 struct bio **bio,
2723                                 sector_t *last_block,
2724                                 struct writeback_control *wbc,
2725                                 enum iostat_type io_type,
2726                                 int compr_blocks,
2727                                 bool allow_balance)
2728 {
2729         struct inode *inode = page->mapping->host;
2730         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2731         loff_t i_size = i_size_read(inode);
2732         const pgoff_t end_index = ((unsigned long long)i_size)
2733                                                         >> PAGE_SHIFT;
2734         loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
2735         unsigned offset = 0;
2736         bool need_balance_fs = false;
2737         int err = 0;
2738         struct f2fs_io_info fio = {
2739                 .sbi = sbi,
2740                 .ino = inode->i_ino,
2741                 .type = DATA,
2742                 .op = REQ_OP_WRITE,
2743                 .op_flags = wbc_to_write_flags(wbc),
2744                 .old_blkaddr = NULL_ADDR,
2745                 .page = page,
2746                 .encrypted_page = NULL,
2747                 .submitted = false,
2748                 .compr_blocks = compr_blocks,
2749                 .need_lock = LOCK_RETRY,
2750                 .post_read = f2fs_post_read_required(inode),
2751                 .io_type = io_type,
2752                 .io_wbc = wbc,
2753                 .bio = bio,
2754                 .last_block = last_block,
2755         };
2756
2757         trace_f2fs_writepage(page, DATA);
2758
2759         /* we should bypass data pages to proceed the kworkder jobs */
2760         if (unlikely(f2fs_cp_error(sbi))) {
2761                 mapping_set_error(page->mapping, -EIO);
2762                 /*
2763                  * don't drop any dirty dentry pages for keeping lastest
2764                  * directory structure.
2765                  */
2766                 if (S_ISDIR(inode->i_mode))
2767                         goto redirty_out;
2768                 goto out;
2769         }
2770
2771         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2772                 goto redirty_out;
2773
2774         if (page->index < end_index ||
2775                         f2fs_verity_in_progress(inode) ||
2776                         compr_blocks)
2777                 goto write;
2778
2779         /*
2780          * If the offset is out-of-range of file size,
2781          * this page does not have to be written to disk.
2782          */
2783         offset = i_size & (PAGE_SIZE - 1);
2784         if ((page->index >= end_index + 1) || !offset)
2785                 goto out;
2786
2787         zero_user_segment(page, offset, PAGE_SIZE);
2788 write:
2789         if (f2fs_is_drop_cache(inode))
2790                 goto out;
2791
2792         /* Dentry/quota blocks are controlled by checkpoint */
2793         if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
2794                 /*
2795                  * We need to wait for node_write to avoid block allocation during
2796                  * checkpoint. This can only happen to quota writes which can cause
2797                  * the below discard race condition.
2798                  */
2799                 if (IS_NOQUOTA(inode))
2800                         f2fs_down_read(&sbi->node_write);
2801
2802                 fio.need_lock = LOCK_DONE;
2803                 err = f2fs_do_write_data_page(&fio);
2804
2805                 if (IS_NOQUOTA(inode))
2806                         f2fs_up_read(&sbi->node_write);
2807
2808                 goto done;
2809         }
2810
2811         if (!wbc->for_reclaim)
2812                 need_balance_fs = true;
2813         else if (has_not_enough_free_secs(sbi, 0, 0))
2814                 goto redirty_out;
2815         else
2816                 set_inode_flag(inode, FI_HOT_DATA);
2817
2818         err = -EAGAIN;
2819         if (f2fs_has_inline_data(inode)) {
2820                 err = f2fs_write_inline_data(inode, page);
2821                 if (!err)
2822                         goto out;
2823         }
2824
2825         if (err == -EAGAIN) {
2826                 err = f2fs_do_write_data_page(&fio);
2827                 if (err == -EAGAIN) {
2828                         fio.need_lock = LOCK_REQ;
2829                         err = f2fs_do_write_data_page(&fio);
2830                 }
2831         }
2832
2833         if (err) {
2834                 file_set_keep_isize(inode);
2835         } else {
2836                 spin_lock(&F2FS_I(inode)->i_size_lock);
2837                 if (F2FS_I(inode)->last_disk_size < psize)
2838                         F2FS_I(inode)->last_disk_size = psize;
2839                 spin_unlock(&F2FS_I(inode)->i_size_lock);
2840         }
2841
2842 done:
2843         if (err && err != -ENOENT)
2844                 goto redirty_out;
2845
2846 out:
2847         inode_dec_dirty_pages(inode);
2848         if (err) {
2849                 ClearPageUptodate(page);
2850                 clear_page_private_gcing(page);
2851         }
2852
2853         if (wbc->for_reclaim) {
2854                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2855                 clear_inode_flag(inode, FI_HOT_DATA);
2856                 f2fs_remove_dirty_inode(inode);
2857                 submitted = NULL;
2858         }
2859         unlock_page(page);
2860         if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2861                         !F2FS_I(inode)->cp_task && allow_balance)
2862                 f2fs_balance_fs(sbi, need_balance_fs);
2863
2864         if (unlikely(f2fs_cp_error(sbi))) {
2865                 f2fs_submit_merged_write(sbi, DATA);
2866                 f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2867                 submitted = NULL;
2868         }
2869
2870         if (submitted)
2871                 *submitted = fio.submitted ? 1 : 0;
2872
2873         return 0;
2874
2875 redirty_out:
2876         redirty_page_for_writepage(wbc, page);
2877         /*
2878          * pageout() in MM traslates EAGAIN, so calls handle_write_error()
2879          * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2880          * file_write_and_wait_range() will see EIO error, which is critical
2881          * to return value of fsync() followed by atomic_write failure to user.
2882          */
2883         if (!err || wbc->for_reclaim)
2884                 return AOP_WRITEPAGE_ACTIVATE;
2885         unlock_page(page);
2886         return err;
2887 }
2888
2889 static int f2fs_write_data_page(struct page *page,
2890                                         struct writeback_control *wbc)
2891 {
2892 #ifdef CONFIG_F2FS_FS_COMPRESSION
2893         struct inode *inode = page->mapping->host;
2894
2895         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2896                 goto out;
2897
2898         if (f2fs_compressed_file(inode)) {
2899                 if (f2fs_is_compressed_cluster(inode, page->index)) {
2900                         redirty_page_for_writepage(wbc, page);
2901                         return AOP_WRITEPAGE_ACTIVATE;
2902                 }
2903         }
2904 out:
2905 #endif
2906
2907         return f2fs_write_single_data_page(page, NULL, NULL, NULL,
2908                                                 wbc, FS_DATA_IO, 0, true);
2909 }
2910
2911 /*
2912  * This function was copied from write_cche_pages from mm/page-writeback.c.
2913  * The major change is making write step of cold data page separately from
2914  * warm/hot data page.
2915  */
2916 static int f2fs_write_cache_pages(struct address_space *mapping,
2917                                         struct writeback_control *wbc,
2918                                         enum iostat_type io_type)
2919 {
2920         int ret = 0;
2921         int done = 0, retry = 0;
2922         struct page *pages[F2FS_ONSTACK_PAGES];
2923         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2924         struct bio *bio = NULL;
2925         sector_t last_block;
2926 #ifdef CONFIG_F2FS_FS_COMPRESSION
2927         struct inode *inode = mapping->host;
2928         struct compress_ctx cc = {
2929                 .inode = inode,
2930                 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2931                 .cluster_size = F2FS_I(inode)->i_cluster_size,
2932                 .cluster_idx = NULL_CLUSTER,
2933                 .rpages = NULL,
2934                 .nr_rpages = 0,
2935                 .cpages = NULL,
2936                 .valid_nr_cpages = 0,
2937                 .rbuf = NULL,
2938                 .cbuf = NULL,
2939                 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
2940                 .private = NULL,
2941         };
2942 #endif
2943         int nr_pages;
2944         pgoff_t index;
2945         pgoff_t end;            /* Inclusive */
2946         pgoff_t done_index;
2947         int range_whole = 0;
2948         xa_mark_t tag;
2949         int nwritten = 0;
2950         int submitted = 0;
2951         int i;
2952
2953         if (get_dirty_pages(mapping->host) <=
2954                                 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
2955                 set_inode_flag(mapping->host, FI_HOT_DATA);
2956         else
2957                 clear_inode_flag(mapping->host, FI_HOT_DATA);
2958
2959         if (wbc->range_cyclic) {
2960                 index = mapping->writeback_index; /* prev offset */
2961                 end = -1;
2962         } else {
2963                 index = wbc->range_start >> PAGE_SHIFT;
2964                 end = wbc->range_end >> PAGE_SHIFT;
2965                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2966                         range_whole = 1;
2967         }
2968         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2969                 tag = PAGECACHE_TAG_TOWRITE;
2970         else
2971                 tag = PAGECACHE_TAG_DIRTY;
2972 retry:
2973         retry = 0;
2974         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2975                 tag_pages_for_writeback(mapping, index, end);
2976         done_index = index;
2977         while (!done && !retry && (index <= end)) {
2978                 nr_pages = find_get_pages_range_tag(mapping, &index, end,
2979                                 tag, F2FS_ONSTACK_PAGES, pages);
2980                 if (nr_pages == 0)
2981                         break;
2982
2983                 for (i = 0; i < nr_pages; i++) {
2984                         struct page *page = pages[i];
2985                         bool need_readd;
2986 readd:
2987                         need_readd = false;
2988 #ifdef CONFIG_F2FS_FS_COMPRESSION
2989                         if (f2fs_compressed_file(inode)) {
2990                                 void *fsdata = NULL;
2991                                 struct page *pagep;
2992                                 int ret2;
2993
2994                                 ret = f2fs_init_compress_ctx(&cc);
2995                                 if (ret) {
2996                                         done = 1;
2997                                         break;
2998                                 }
2999
3000                                 if (!f2fs_cluster_can_merge_page(&cc,
3001                                                                 page->index)) {
3002                                         ret = f2fs_write_multi_pages(&cc,
3003                                                 &submitted, wbc, io_type);
3004                                         if (!ret)
3005                                                 need_readd = true;
3006                                         goto result;
3007                                 }
3008
3009                                 if (unlikely(f2fs_cp_error(sbi)))
3010                                         goto lock_page;
3011
3012                                 if (!f2fs_cluster_is_empty(&cc))
3013                                         goto lock_page;
3014
3015                                 if (f2fs_all_cluster_page_ready(&cc,
3016                                         pages, i, nr_pages, true))
3017                                         goto lock_page;
3018
3019                                 ret2 = f2fs_prepare_compress_overwrite(
3020                                                         inode, &pagep,
3021                                                         page->index, &fsdata);
3022                                 if (ret2 < 0) {
3023                                         ret = ret2;
3024                                         done = 1;
3025                                         break;
3026                                 } else if (ret2 &&
3027                                         (!f2fs_compress_write_end(inode,
3028                                                 fsdata, page->index, 1) ||
3029                                          !f2fs_all_cluster_page_ready(&cc,
3030                                                 pages, i, nr_pages, false))) {
3031                                         retry = 1;
3032                                         break;
3033                                 }
3034                         }
3035 #endif
3036                         /* give a priority to WB_SYNC threads */
3037                         if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3038                                         wbc->sync_mode == WB_SYNC_NONE) {
3039                                 done = 1;
3040                                 break;
3041                         }
3042 #ifdef CONFIG_F2FS_FS_COMPRESSION
3043 lock_page:
3044 #endif
3045                         done_index = page->index;
3046 retry_write:
3047                         lock_page(page);
3048
3049                         if (unlikely(page->mapping != mapping)) {
3050 continue_unlock:
3051                                 unlock_page(page);
3052                                 continue;
3053                         }
3054
3055                         if (!PageDirty(page)) {
3056                                 /* someone wrote it for us */
3057                                 goto continue_unlock;
3058                         }
3059
3060                         if (PageWriteback(page)) {
3061                                 if (wbc->sync_mode != WB_SYNC_NONE)
3062                                         f2fs_wait_on_page_writeback(page,
3063                                                         DATA, true, true);
3064                                 else
3065                                         goto continue_unlock;
3066                         }
3067
3068                         if (!clear_page_dirty_for_io(page))
3069                                 goto continue_unlock;
3070
3071 #ifdef CONFIG_F2FS_FS_COMPRESSION
3072                         if (f2fs_compressed_file(inode)) {
3073                                 get_page(page);
3074                                 f2fs_compress_ctx_add_page(&cc, page);
3075                                 continue;
3076                         }
3077 #endif
3078                         ret = f2fs_write_single_data_page(page, &submitted,
3079                                         &bio, &last_block, wbc, io_type,
3080                                         0, true);
3081                         if (ret == AOP_WRITEPAGE_ACTIVATE)
3082                                 unlock_page(page);
3083 #ifdef CONFIG_F2FS_FS_COMPRESSION
3084 result:
3085 #endif
3086                         nwritten += submitted;
3087                         wbc->nr_to_write -= submitted;
3088
3089                         if (unlikely(ret)) {
3090                                 /*
3091                                  * keep nr_to_write, since vfs uses this to
3092                                  * get # of written pages.
3093                                  */
3094                                 if (ret == AOP_WRITEPAGE_ACTIVATE) {
3095                                         ret = 0;
3096                                         goto next;
3097                                 } else if (ret == -EAGAIN) {
3098                                         ret = 0;
3099                                         if (wbc->sync_mode == WB_SYNC_ALL) {
3100                                                 f2fs_io_schedule_timeout(
3101                                                         DEFAULT_IO_TIMEOUT);
3102                                                 goto retry_write;
3103                                         }
3104                                         goto next;
3105                                 }
3106                                 done_index = page->index + 1;
3107                                 done = 1;
3108                                 break;
3109                         }
3110
3111                         if (wbc->nr_to_write <= 0 &&
3112                                         wbc->sync_mode == WB_SYNC_NONE) {
3113                                 done = 1;
3114                                 break;
3115                         }
3116 next:
3117                         if (need_readd)
3118                                 goto readd;
3119                 }
3120                 release_pages(pages, nr_pages);
3121                 cond_resched();
3122         }
3123 #ifdef CONFIG_F2FS_FS_COMPRESSION
3124         /* flush remained pages in compress cluster */
3125         if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3126                 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3127                 nwritten += submitted;
3128                 wbc->nr_to_write -= submitted;
3129                 if (ret) {
3130                         done = 1;
3131                         retry = 0;
3132                 }
3133         }
3134         if (f2fs_compressed_file(inode))
3135                 f2fs_destroy_compress_ctx(&cc, false);
3136 #endif
3137         if (retry) {
3138                 index = 0;
3139                 end = -1;
3140                 goto retry;
3141         }
3142         if (wbc->range_cyclic && !done)
3143                 done_index = 0;
3144         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3145                 mapping->writeback_index = done_index;
3146
3147         if (nwritten)
3148                 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3149                                                                 NULL, 0, DATA);
3150         /* submit cached bio of IPU write */
3151         if (bio)
3152                 f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3153
3154         return ret;
3155 }
3156
3157 static inline bool __should_serialize_io(struct inode *inode,
3158                                         struct writeback_control *wbc)
3159 {
3160         /* to avoid deadlock in path of data flush */
3161         if (F2FS_I(inode)->cp_task)
3162                 return false;
3163
3164         if (!S_ISREG(inode->i_mode))
3165                 return false;
3166         if (IS_NOQUOTA(inode))
3167                 return false;
3168
3169         if (f2fs_need_compress_data(inode))
3170                 return true;
3171         if (wbc->sync_mode != WB_SYNC_ALL)
3172                 return true;
3173         if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3174                 return true;
3175         return false;
3176 }
3177
3178 static int __f2fs_write_data_pages(struct address_space *mapping,
3179                                                 struct writeback_control *wbc,
3180                                                 enum iostat_type io_type)
3181 {
3182         struct inode *inode = mapping->host;
3183         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3184         struct blk_plug plug;
3185         int ret;
3186         bool locked = false;
3187
3188         /* deal with chardevs and other special file */
3189         if (!mapping->a_ops->writepage)
3190                 return 0;
3191
3192         /* skip writing if there is no dirty page in this inode */
3193         if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3194                 return 0;
3195
3196         /* during POR, we don't need to trigger writepage at all. */
3197         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3198                 goto skip_write;
3199
3200         if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3201                         wbc->sync_mode == WB_SYNC_NONE &&
3202                         get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3203                         f2fs_available_free_memory(sbi, DIRTY_DENTS))
3204                 goto skip_write;
3205
3206         /* skip writing in file defragment preparing stage */
3207         if (is_inode_flag_set(inode, FI_SKIP_WRITES))
3208                 goto skip_write;
3209
3210         trace_f2fs_writepages(mapping->host, wbc, DATA);
3211
3212         /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3213         if (wbc->sync_mode == WB_SYNC_ALL)
3214                 atomic_inc(&sbi->wb_sync_req[DATA]);
3215         else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3216                 /* to avoid potential deadlock */
3217                 if (current->plug)
3218                         blk_finish_plug(current->plug);
3219                 goto skip_write;
3220         }
3221
3222         if (__should_serialize_io(inode, wbc)) {
3223                 mutex_lock(&sbi->writepages);
3224                 locked = true;
3225         }
3226
3227         blk_start_plug(&plug);
3228         ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3229         blk_finish_plug(&plug);
3230
3231         if (locked)
3232                 mutex_unlock(&sbi->writepages);
3233
3234         if (wbc->sync_mode == WB_SYNC_ALL)
3235                 atomic_dec(&sbi->wb_sync_req[DATA]);
3236         /*
3237          * if some pages were truncated, we cannot guarantee its mapping->host
3238          * to detect pending bios.
3239          */
3240
3241         f2fs_remove_dirty_inode(inode);
3242         return ret;
3243
3244 skip_write:
3245         wbc->pages_skipped += get_dirty_pages(inode);
3246         trace_f2fs_writepages(mapping->host, wbc, DATA);
3247         return 0;
3248 }
3249
3250 static int f2fs_write_data_pages(struct address_space *mapping,
3251                             struct writeback_control *wbc)
3252 {
3253         struct inode *inode = mapping->host;
3254
3255         return __f2fs_write_data_pages(mapping, wbc,
3256                         F2FS_I(inode)->cp_task == current ?
3257                         FS_CP_DATA_IO : FS_DATA_IO);
3258 }
3259
3260 void f2fs_write_failed(struct inode *inode, loff_t to)
3261 {
3262         loff_t i_size = i_size_read(inode);
3263
3264         if (IS_NOQUOTA(inode))
3265                 return;
3266
3267         /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3268         if (to > i_size && !f2fs_verity_in_progress(inode)) {
3269                 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3270                 filemap_invalidate_lock(inode->i_mapping);
3271
3272                 truncate_pagecache(inode, i_size);
3273                 f2fs_truncate_blocks(inode, i_size, true);
3274
3275                 filemap_invalidate_unlock(inode->i_mapping);
3276                 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3277         }
3278 }
3279
3280 static int prepare_write_begin(struct f2fs_sb_info *sbi,
3281                         struct page *page, loff_t pos, unsigned len,
3282                         block_t *blk_addr, bool *node_changed)
3283 {
3284         struct inode *inode = page->mapping->host;
3285         pgoff_t index = page->index;
3286         struct dnode_of_data dn;
3287         struct page *ipage;
3288         bool locked = false;
3289         struct extent_info ei = {0, };
3290         int err = 0;
3291         int flag;
3292
3293         /*
3294          * If a whole page is being written and we already preallocated all the
3295          * blocks, then there is no need to get a block address now.
3296          */
3297         if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
3298                 return 0;
3299
3300         /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3301         if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
3302                 flag = F2FS_GET_BLOCK_DEFAULT;
3303         else
3304                 flag = F2FS_GET_BLOCK_PRE_AIO;
3305
3306         if (f2fs_has_inline_data(inode) ||
3307                         (pos & PAGE_MASK) >= i_size_read(inode)) {
3308                 f2fs_do_map_lock(sbi, flag, true);
3309                 locked = true;
3310         }
3311
3312 restart:
3313         /* check inline_data */
3314         ipage = f2fs_get_node_page(sbi, inode->i_ino);
3315         if (IS_ERR(ipage)) {
3316                 err = PTR_ERR(ipage);
3317                 goto unlock_out;
3318         }
3319
3320         set_new_dnode(&dn, inode, ipage, ipage, 0);
3321
3322         if (f2fs_has_inline_data(inode)) {
3323                 if (pos + len <= MAX_INLINE_DATA(inode)) {
3324                         f2fs_do_read_inline_data(page, ipage);
3325                         set_inode_flag(inode, FI_DATA_EXIST);
3326                         if (inode->i_nlink)
3327                                 set_page_private_inline(ipage);
3328                 } else {
3329                         err = f2fs_convert_inline_page(&dn, page);
3330                         if (err)
3331                                 goto out;
3332                         if (dn.data_blkaddr == NULL_ADDR)
3333                                 err = f2fs_get_block(&dn, index);
3334                 }
3335         } else if (locked) {
3336                 err = f2fs_get_block(&dn, index);
3337         } else {
3338                 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
3339                         dn.data_blkaddr = ei.blk + index - ei.fofs;
3340                 } else {
3341                         /* hole case */
3342                         err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3343                         if (err || dn.data_blkaddr == NULL_ADDR) {
3344                                 f2fs_put_dnode(&dn);
3345                                 f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
3346                                                                 true);
3347                                 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3348                                 locked = true;
3349                                 goto restart;
3350                         }
3351                 }
3352         }
3353
3354         /* convert_inline_page can make node_changed */
3355         *blk_addr = dn.data_blkaddr;
3356         *node_changed = dn.node_changed;
3357 out:
3358         f2fs_put_dnode(&dn);
3359 unlock_out:
3360         if (locked)
3361                 f2fs_do_map_lock(sbi, flag, false);
3362         return err;
3363 }
3364
3365 static int __find_data_block(struct inode *inode, pgoff_t index,
3366                                 block_t *blk_addr)
3367 {
3368         struct dnode_of_data dn;
3369         struct page *ipage;
3370         struct extent_info ei = {0, };
3371         int err = 0;
3372
3373         ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
3374         if (IS_ERR(ipage))
3375                 return PTR_ERR(ipage);
3376
3377         set_new_dnode(&dn, inode, ipage, ipage, 0);
3378
3379         if (f2fs_lookup_extent_cache(inode, index, &ei)) {
3380                 dn.data_blkaddr = ei.blk + index - ei.fofs;
3381         } else {
3382                 /* hole case */
3383                 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3384                 if (err) {
3385                         dn.data_blkaddr = NULL_ADDR;
3386                         err = 0;
3387                 }
3388         }
3389         *blk_addr = dn.data_blkaddr;
3390         f2fs_put_dnode(&dn);
3391         return err;
3392 }
3393
3394 static int __reserve_data_block(struct inode *inode, pgoff_t index,
3395                                 block_t *blk_addr, bool *node_changed)
3396 {
3397         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3398         struct dnode_of_data dn;
3399         struct page *ipage;
3400         int err = 0;
3401
3402         f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
3403
3404         ipage = f2fs_get_node_page(sbi, inode->i_ino);
3405         if (IS_ERR(ipage)) {
3406                 err = PTR_ERR(ipage);
3407                 goto unlock_out;
3408         }
3409         set_new_dnode(&dn, inode, ipage, ipage, 0);
3410
3411         err = f2fs_get_block(&dn, index);
3412
3413         *blk_addr = dn.data_blkaddr;
3414         *node_changed = dn.node_changed;
3415         f2fs_put_dnode(&dn);
3416
3417 unlock_out:
3418         f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
3419         return err;
3420 }
3421
3422 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
3423                         struct page *page, loff_t pos, unsigned int len,
3424                         block_t *blk_addr, bool *node_changed)
3425 {
3426         struct inode *inode = page->mapping->host;
3427         struct inode *cow_inode = F2FS_I(inode)->cow_inode;
3428         pgoff_t index = page->index;
3429         int err = 0;
3430         block_t ori_blk_addr = NULL_ADDR;
3431
3432         /* If pos is beyond the end of file, reserve a new block in COW inode */
3433         if ((pos & PAGE_MASK) >= i_size_read(inode))
3434                 goto reserve_block;
3435
3436         /* Look for the block in COW inode first */
3437         err = __find_data_block(cow_inode, index, blk_addr);
3438         if (err)
3439                 return err;
3440         else if (*blk_addr != NULL_ADDR)
3441                 return 0;
3442
3443         /* Look for the block in the original inode */
3444         err = __find_data_block(inode, index, &ori_blk_addr);
3445         if (err)
3446                 return err;
3447
3448 reserve_block:
3449         /* Finally, we should reserve a new block in COW inode for the update */
3450         err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
3451         if (err)
3452                 return err;
3453         inc_atomic_write_cnt(inode);
3454
3455         if (ori_blk_addr != NULL_ADDR)
3456                 *blk_addr = ori_blk_addr;
3457         return 0;
3458 }
3459
3460 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
3461                 loff_t pos, unsigned len, struct page **pagep, void **fsdata)
3462 {
3463         struct inode *inode = mapping->host;
3464         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3465         struct page *page = NULL;
3466         pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
3467         bool need_balance = false;
3468         block_t blkaddr = NULL_ADDR;
3469         int err = 0;
3470
3471         trace_f2fs_write_begin(inode, pos, len);
3472
3473         if (!f2fs_is_checkpoint_ready(sbi)) {
3474                 err = -ENOSPC;
3475                 goto fail;
3476         }
3477
3478         /*
3479          * We should check this at this moment to avoid deadlock on inode page
3480          * and #0 page. The locking rule for inline_data conversion should be:
3481          * lock_page(page #0) -> lock_page(inode_page)
3482          */
3483         if (index != 0) {
3484                 err = f2fs_convert_inline_inode(inode);
3485                 if (err)
3486                         goto fail;
3487         }
3488
3489 #ifdef CONFIG_F2FS_FS_COMPRESSION
3490         if (f2fs_compressed_file(inode)) {
3491                 int ret;
3492
3493                 *fsdata = NULL;
3494
3495                 if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3496                         goto repeat;
3497
3498                 ret = f2fs_prepare_compress_overwrite(inode, pagep,
3499                                                         index, fsdata);
3500                 if (ret < 0) {
3501                         err = ret;
3502                         goto fail;
3503                 } else if (ret) {
3504                         return 0;
3505                 }
3506         }
3507 #endif
3508
3509 repeat:
3510         /*
3511          * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3512          * wait_for_stable_page. Will wait that below with our IO control.
3513          */
3514         page = f2fs_pagecache_get_page(mapping, index,
3515                                 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3516         if (!page) {
3517                 err = -ENOMEM;
3518                 goto fail;
3519         }
3520
3521         /* TODO: cluster can be compressed due to race with .writepage */
3522
3523         *pagep = page;
3524
3525         if (f2fs_is_atomic_file(inode))
3526                 err = prepare_atomic_write_begin(sbi, page, pos, len,
3527                                         &blkaddr, &need_balance);
3528         else
3529                 err = prepare_write_begin(sbi, page, pos, len,
3530                                         &blkaddr, &need_balance);
3531         if (err)
3532                 goto fail;
3533
3534         if (need_balance && !IS_NOQUOTA(inode) &&
3535                         has_not_enough_free_secs(sbi, 0, 0)) {
3536                 unlock_page(page);
3537                 f2fs_balance_fs(sbi, true);
3538                 lock_page(page);
3539                 if (page->mapping != mapping) {
3540                         /* The page got truncated from under us */
3541                         f2fs_put_page(page, 1);
3542                         goto repeat;
3543                 }
3544         }
3545
3546         f2fs_wait_on_page_writeback(page, DATA, false, true);
3547
3548         if (len == PAGE_SIZE || PageUptodate(page))
3549                 return 0;
3550
3551         if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3552             !f2fs_verity_in_progress(inode)) {
3553                 zero_user_segment(page, len, PAGE_SIZE);
3554                 return 0;
3555         }
3556
3557         if (blkaddr == NEW_ADDR) {
3558                 zero_user_segment(page, 0, PAGE_SIZE);
3559                 SetPageUptodate(page);
3560         } else {
3561                 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3562                                 DATA_GENERIC_ENHANCE_READ)) {
3563                         err = -EFSCORRUPTED;
3564                         goto fail;
3565                 }
3566                 err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
3567                 if (err)
3568                         goto fail;
3569
3570                 lock_page(page);
3571                 if (unlikely(page->mapping != mapping)) {
3572                         f2fs_put_page(page, 1);
3573                         goto repeat;
3574                 }
3575                 if (unlikely(!PageUptodate(page))) {
3576                         err = -EIO;
3577                         goto fail;
3578                 }
3579         }
3580         return 0;
3581
3582 fail:
3583         f2fs_put_page(page, 1);
3584         f2fs_write_failed(inode, pos + len);
3585         return err;
3586 }
3587
3588 static int f2fs_write_end(struct file *file,
3589                         struct address_space *mapping,
3590                         loff_t pos, unsigned len, unsigned copied,
3591                         struct page *page, void *fsdata)
3592 {
3593         struct inode *inode = page->mapping->host;
3594
3595         trace_f2fs_write_end(inode, pos, len, copied);
3596
3597         /*
3598          * This should be come from len == PAGE_SIZE, and we expect copied
3599          * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3600          * let generic_perform_write() try to copy data again through copied=0.
3601          */
3602         if (!PageUptodate(page)) {
3603                 if (unlikely(copied != len))
3604                         copied = 0;
3605                 else
3606                         SetPageUptodate(page);
3607         }
3608
3609 #ifdef CONFIG_F2FS_FS_COMPRESSION
3610         /* overwrite compressed file */
3611         if (f2fs_compressed_file(inode) && fsdata) {
3612                 f2fs_compress_write_end(inode, fsdata, page->index, copied);
3613                 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3614
3615                 if (pos + copied > i_size_read(inode) &&
3616                                 !f2fs_verity_in_progress(inode))
3617                         f2fs_i_size_write(inode, pos + copied);
3618                 return copied;
3619         }
3620 #endif
3621
3622         if (!copied)
3623                 goto unlock_out;
3624
3625         set_page_dirty(page);
3626
3627         if (pos + copied > i_size_read(inode) &&
3628             !f2fs_verity_in_progress(inode)) {
3629                 f2fs_i_size_write(inode, pos + copied);
3630                 if (f2fs_is_atomic_file(inode))
3631                         f2fs_i_size_write(F2FS_I(inode)->cow_inode,
3632                                         pos + copied);
3633         }
3634 unlock_out:
3635         f2fs_put_page(page, 1);
3636         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3637         return copied;
3638 }
3639
3640 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
3641 {
3642         struct inode *inode = folio->mapping->host;
3643         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3644
3645         if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3646                                 (offset || length != folio_size(folio)))
3647                 return;
3648
3649         if (folio_test_dirty(folio)) {
3650                 if (inode->i_ino == F2FS_META_INO(sbi)) {
3651                         dec_page_count(sbi, F2FS_DIRTY_META);
3652                 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3653                         dec_page_count(sbi, F2FS_DIRTY_NODES);
3654                 } else {
3655                         inode_dec_dirty_pages(inode);
3656                         f2fs_remove_dirty_inode(inode);
3657                 }
3658         }
3659
3660         clear_page_private_gcing(&folio->page);
3661
3662         if (test_opt(sbi, COMPRESS_CACHE) &&
3663                         inode->i_ino == F2FS_COMPRESS_INO(sbi))
3664                 clear_page_private_data(&folio->page);
3665
3666         folio_detach_private(folio);
3667 }
3668
3669 bool f2fs_release_folio(struct folio *folio, gfp_t wait)
3670 {
3671         struct f2fs_sb_info *sbi;
3672
3673         /* If this is dirty folio, keep private data */
3674         if (folio_test_dirty(folio))
3675                 return false;
3676
3677         sbi = F2FS_M_SB(folio->mapping);
3678         if (test_opt(sbi, COMPRESS_CACHE)) {
3679                 struct inode *inode = folio->mapping->host;
3680
3681                 if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
3682                         clear_page_private_data(&folio->page);
3683         }
3684
3685         clear_page_private_gcing(&folio->page);
3686
3687         folio_detach_private(folio);
3688         return true;
3689 }
3690
3691 static bool f2fs_dirty_data_folio(struct address_space *mapping,
3692                 struct folio *folio)
3693 {
3694         struct inode *inode = mapping->host;
3695
3696         trace_f2fs_set_page_dirty(&folio->page, DATA);
3697
3698         if (!folio_test_uptodate(folio))
3699                 folio_mark_uptodate(folio);
3700         BUG_ON(folio_test_swapcache(folio));
3701
3702         if (!folio_test_dirty(folio)) {
3703                 filemap_dirty_folio(mapping, folio);
3704                 f2fs_update_dirty_folio(inode, folio);
3705                 return true;
3706         }
3707         return false;
3708 }
3709
3710
3711 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3712 {
3713 #ifdef CONFIG_F2FS_FS_COMPRESSION
3714         struct dnode_of_data dn;
3715         sector_t start_idx, blknr = 0;
3716         int ret;
3717
3718         start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3719
3720         set_new_dnode(&dn, inode, NULL, NULL, 0);
3721         ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
3722         if (ret)
3723                 return 0;
3724
3725         if (dn.data_blkaddr != COMPRESS_ADDR) {
3726                 dn.ofs_in_node += block - start_idx;
3727                 blknr = f2fs_data_blkaddr(&dn);
3728                 if (!__is_valid_data_blkaddr(blknr))
3729                         blknr = 0;
3730         }
3731
3732         f2fs_put_dnode(&dn);
3733         return blknr;
3734 #else
3735         return 0;
3736 #endif
3737 }
3738
3739
3740 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3741 {
3742         struct inode *inode = mapping->host;
3743         sector_t blknr = 0;
3744
3745         if (f2fs_has_inline_data(inode))
3746                 goto out;
3747
3748         /* make sure allocating whole blocks */
3749         if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3750                 filemap_write_and_wait(mapping);
3751
3752         /* Block number less than F2FS MAX BLOCKS */
3753         if (unlikely(block >= max_file_blocks(inode)))
3754                 goto out;
3755
3756         if (f2fs_compressed_file(inode)) {
3757                 blknr = f2fs_bmap_compress(inode, block);
3758         } else {
3759                 struct f2fs_map_blocks map;
3760
3761                 memset(&map, 0, sizeof(map));
3762                 map.m_lblk = block;
3763                 map.m_len = 1;
3764                 map.m_next_pgofs = NULL;
3765                 map.m_seg_type = NO_CHECK_TYPE;
3766
3767                 if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP))
3768                         blknr = map.m_pblk;
3769         }
3770 out:
3771         trace_f2fs_bmap(inode, block, blknr);
3772         return blknr;
3773 }
3774
3775 #ifdef CONFIG_SWAP
3776 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3777                                                         unsigned int blkcnt)
3778 {
3779         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3780         unsigned int blkofs;
3781         unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3782         unsigned int secidx = start_blk / blk_per_sec;
3783         unsigned int end_sec = secidx + blkcnt / blk_per_sec;
3784         int ret = 0;
3785
3786         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3787         filemap_invalidate_lock(inode->i_mapping);
3788
3789         set_inode_flag(inode, FI_ALIGNED_WRITE);
3790         set_inode_flag(inode, FI_OPU_WRITE);
3791
3792         for (; secidx < end_sec; secidx++) {
3793                 f2fs_down_write(&sbi->pin_sem);
3794
3795                 f2fs_lock_op(sbi);
3796                 f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3797                 f2fs_unlock_op(sbi);
3798
3799                 set_inode_flag(inode, FI_SKIP_WRITES);
3800
3801                 for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
3802                         struct page *page;
3803                         unsigned int blkidx = secidx * blk_per_sec + blkofs;
3804
3805                         page = f2fs_get_lock_data_page(inode, blkidx, true);
3806                         if (IS_ERR(page)) {
3807                                 f2fs_up_write(&sbi->pin_sem);
3808                                 ret = PTR_ERR(page);
3809                                 goto done;
3810                         }
3811
3812                         set_page_dirty(page);
3813                         f2fs_put_page(page, 1);
3814                 }
3815
3816                 clear_inode_flag(inode, FI_SKIP_WRITES);
3817
3818                 ret = filemap_fdatawrite(inode->i_mapping);
3819
3820                 f2fs_up_write(&sbi->pin_sem);
3821
3822                 if (ret)
3823                         break;
3824         }
3825
3826 done:
3827         clear_inode_flag(inode, FI_SKIP_WRITES);
3828         clear_inode_flag(inode, FI_OPU_WRITE);
3829         clear_inode_flag(inode, FI_ALIGNED_WRITE);
3830
3831         filemap_invalidate_unlock(inode->i_mapping);
3832         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3833
3834         return ret;
3835 }
3836
3837 static int check_swap_activate(struct swap_info_struct *sis,
3838                                 struct file *swap_file, sector_t *span)
3839 {
3840         struct address_space *mapping = swap_file->f_mapping;
3841         struct inode *inode = mapping->host;
3842         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3843         sector_t cur_lblock;
3844         sector_t last_lblock;
3845         sector_t pblock;
3846         sector_t lowest_pblock = -1;
3847         sector_t highest_pblock = 0;
3848         int nr_extents = 0;
3849         unsigned long nr_pblocks;
3850         unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
3851         unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
3852         unsigned int not_aligned = 0;
3853         int ret = 0;
3854
3855         /*
3856          * Map all the blocks into the extent list.  This code doesn't try
3857          * to be very smart.
3858          */
3859         cur_lblock = 0;
3860         last_lblock = bytes_to_blks(inode, i_size_read(inode));
3861
3862         while (cur_lblock < last_lblock && cur_lblock < sis->max) {
3863                 struct f2fs_map_blocks map;
3864 retry:
3865                 cond_resched();
3866
3867                 memset(&map, 0, sizeof(map));
3868                 map.m_lblk = cur_lblock;
3869                 map.m_len = last_lblock - cur_lblock;
3870                 map.m_next_pgofs = NULL;
3871                 map.m_next_extent = NULL;
3872                 map.m_seg_type = NO_CHECK_TYPE;
3873                 map.m_may_create = false;
3874
3875                 ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
3876                 if (ret)
3877                         goto out;
3878
3879                 /* hole */
3880                 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
3881                         f2fs_err(sbi, "Swapfile has holes");
3882                         ret = -EINVAL;
3883                         goto out;
3884                 }
3885
3886                 pblock = map.m_pblk;
3887                 nr_pblocks = map.m_len;
3888
3889                 if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
3890                                 nr_pblocks & sec_blks_mask) {
3891                         not_aligned++;
3892
3893                         nr_pblocks = roundup(nr_pblocks, blks_per_sec);
3894                         if (cur_lblock + nr_pblocks > sis->max)
3895                                 nr_pblocks -= blks_per_sec;
3896
3897                         if (!nr_pblocks) {
3898                                 /* this extent is last one */
3899                                 nr_pblocks = map.m_len;
3900                                 f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
3901                                 goto next;
3902                         }
3903
3904                         ret = f2fs_migrate_blocks(inode, cur_lblock,
3905                                                         nr_pblocks);
3906                         if (ret)
3907                                 goto out;
3908                         goto retry;
3909                 }
3910 next:
3911                 if (cur_lblock + nr_pblocks >= sis->max)
3912                         nr_pblocks = sis->max - cur_lblock;
3913
3914                 if (cur_lblock) {       /* exclude the header page */
3915                         if (pblock < lowest_pblock)
3916                                 lowest_pblock = pblock;
3917                         if (pblock + nr_pblocks - 1 > highest_pblock)
3918                                 highest_pblock = pblock + nr_pblocks - 1;
3919                 }
3920
3921                 /*
3922                  * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
3923                  */
3924                 ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
3925                 if (ret < 0)
3926                         goto out;
3927                 nr_extents += ret;
3928                 cur_lblock += nr_pblocks;
3929         }
3930         ret = nr_extents;
3931         *span = 1 + highest_pblock - lowest_pblock;
3932         if (cur_lblock == 0)
3933                 cur_lblock = 1; /* force Empty message */
3934         sis->max = cur_lblock;
3935         sis->pages = cur_lblock - 1;
3936         sis->highest_bit = cur_lblock - 1;
3937 out:
3938         if (not_aligned)
3939                 f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
3940                           not_aligned, blks_per_sec * F2FS_BLKSIZE);
3941         return ret;
3942 }
3943
3944 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
3945                                 sector_t *span)
3946 {
3947         struct inode *inode = file_inode(file);
3948         int ret;
3949
3950         if (!S_ISREG(inode->i_mode))
3951                 return -EINVAL;
3952
3953         if (f2fs_readonly(F2FS_I_SB(inode)->sb))
3954                 return -EROFS;
3955
3956         if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
3957                 f2fs_err(F2FS_I_SB(inode),
3958                         "Swapfile not supported in LFS mode");
3959                 return -EINVAL;
3960         }
3961
3962         ret = f2fs_convert_inline_inode(inode);
3963         if (ret)
3964                 return ret;
3965
3966         if (!f2fs_disable_compressed_file(inode))
3967                 return -EINVAL;
3968
3969         f2fs_precache_extents(inode);
3970
3971         ret = check_swap_activate(sis, file, span);
3972         if (ret < 0)
3973                 return ret;
3974
3975         set_inode_flag(inode, FI_PIN_FILE);
3976         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3977         return ret;
3978 }
3979
3980 static void f2fs_swap_deactivate(struct file *file)
3981 {
3982         struct inode *inode = file_inode(file);
3983
3984         clear_inode_flag(inode, FI_PIN_FILE);
3985 }
3986 #else
3987 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
3988                                 sector_t *span)
3989 {
3990         return -EOPNOTSUPP;
3991 }
3992
3993 static void f2fs_swap_deactivate(struct file *file)
3994 {
3995 }
3996 #endif
3997
3998 const struct address_space_operations f2fs_dblock_aops = {
3999         .read_folio     = f2fs_read_data_folio,
4000         .readahead      = f2fs_readahead,
4001         .writepage      = f2fs_write_data_page,
4002         .writepages     = f2fs_write_data_pages,
4003         .write_begin    = f2fs_write_begin,
4004         .write_end      = f2fs_write_end,
4005         .dirty_folio    = f2fs_dirty_data_folio,
4006         .migrate_folio  = filemap_migrate_folio,
4007         .invalidate_folio = f2fs_invalidate_folio,
4008         .release_folio  = f2fs_release_folio,
4009         .direct_IO      = noop_direct_IO,
4010         .bmap           = f2fs_bmap,
4011         .swap_activate  = f2fs_swap_activate,
4012         .swap_deactivate = f2fs_swap_deactivate,
4013 };
4014
4015 void f2fs_clear_page_cache_dirty_tag(struct page *page)
4016 {
4017         struct address_space *mapping = page_mapping(page);
4018         unsigned long flags;
4019
4020         xa_lock_irqsave(&mapping->i_pages, flags);
4021         __xa_clear_mark(&mapping->i_pages, page_index(page),
4022                                                 PAGECACHE_TAG_DIRTY);
4023         xa_unlock_irqrestore(&mapping->i_pages, flags);
4024 }
4025
4026 int __init f2fs_init_post_read_processing(void)
4027 {
4028         bio_post_read_ctx_cache =
4029                 kmem_cache_create("f2fs_bio_post_read_ctx",
4030                                   sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4031         if (!bio_post_read_ctx_cache)
4032                 goto fail;
4033         bio_post_read_ctx_pool =
4034                 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4035                                          bio_post_read_ctx_cache);
4036         if (!bio_post_read_ctx_pool)
4037                 goto fail_free_cache;
4038         return 0;
4039
4040 fail_free_cache:
4041         kmem_cache_destroy(bio_post_read_ctx_cache);
4042 fail:
4043         return -ENOMEM;
4044 }
4045
4046 void f2fs_destroy_post_read_processing(void)
4047 {
4048         mempool_destroy(bio_post_read_ctx_pool);
4049         kmem_cache_destroy(bio_post_read_ctx_cache);
4050 }
4051
4052 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4053 {
4054         if (!f2fs_sb_has_encrypt(sbi) &&
4055                 !f2fs_sb_has_verity(sbi) &&
4056                 !f2fs_sb_has_compression(sbi))
4057                 return 0;
4058
4059         sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4060                                                  WQ_UNBOUND | WQ_HIGHPRI,
4061                                                  num_online_cpus());
4062         if (!sbi->post_read_wq)
4063                 return -ENOMEM;
4064         return 0;
4065 }
4066
4067 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4068 {
4069         if (sbi->post_read_wq)
4070                 destroy_workqueue(sbi->post_read_wq);
4071 }
4072
4073 int __init f2fs_init_bio_entry_cache(void)
4074 {
4075         bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4076                         sizeof(struct bio_entry));
4077         if (!bio_entry_slab)
4078                 return -ENOMEM;
4079         return 0;
4080 }
4081
4082 void f2fs_destroy_bio_entry_cache(void)
4083 {
4084         kmem_cache_destroy(bio_entry_slab);
4085 }
4086
4087 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
4088                             unsigned int flags, struct iomap *iomap,
4089                             struct iomap *srcmap)
4090 {
4091         struct f2fs_map_blocks map = {};
4092         pgoff_t next_pgofs = 0;
4093         int err;
4094
4095         map.m_lblk = bytes_to_blks(inode, offset);
4096         map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
4097         map.m_next_pgofs = &next_pgofs;
4098         map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
4099         if (flags & IOMAP_WRITE)
4100                 map.m_may_create = true;
4101
4102         err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
4103                               F2FS_GET_BLOCK_DIO);
4104         if (err)
4105                 return err;
4106
4107         iomap->offset = blks_to_bytes(inode, map.m_lblk);
4108
4109         /*
4110          * When inline encryption is enabled, sometimes I/O to an encrypted file
4111          * has to be broken up to guarantee DUN contiguity.  Handle this by
4112          * limiting the length of the mapping returned.
4113          */
4114         map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
4115
4116         if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
4117                 iomap->length = blks_to_bytes(inode, map.m_len);
4118                 if (map.m_flags & F2FS_MAP_MAPPED) {
4119                         iomap->type = IOMAP_MAPPED;
4120                         iomap->flags |= IOMAP_F_MERGED;
4121                 } else {
4122                         iomap->type = IOMAP_UNWRITTEN;
4123                 }
4124                 if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
4125                         return -EINVAL;
4126
4127                 iomap->bdev = map.m_bdev;
4128                 iomap->addr = blks_to_bytes(inode, map.m_pblk);
4129         } else {
4130                 iomap->length = blks_to_bytes(inode, next_pgofs) -
4131                                 iomap->offset;
4132                 iomap->type = IOMAP_HOLE;
4133                 iomap->addr = IOMAP_NULL_ADDR;
4134         }
4135
4136         if (map.m_flags & F2FS_MAP_NEW)
4137                 iomap->flags |= IOMAP_F_NEW;
4138         if ((inode->i_state & I_DIRTY_DATASYNC) ||
4139             offset + length > i_size_read(inode))
4140                 iomap->flags |= IOMAP_F_DIRTY;
4141
4142         return 0;
4143 }
4144
4145 const struct iomap_ops f2fs_iomap_ops = {
4146         .iomap_begin    = f2fs_iomap_begin,
4147 };