Merge tag 'fs_for_v6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 29 Jun 2023 20:39:51 +0000 (13:39 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 29 Jun 2023 20:39:51 +0000 (13:39 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 29 Jun 2023 20:39:51 +0000 (13:39 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 29 Jun 2023 20:39:51 +0000 (13:39 -0700)
diff --combined fs/buffer.c

index cdd1002,00cad26..bd09132
--- 1/fs/buffer.c
--- 2/fs/buffer.c
+++ b/fs/buffer.c
@@@ -111,6 -111,7 +111,6 @@@ void buffer_check_dirty_writeback(struc
                 bh = bh->b_this_page;
         } while (bh != head);
   }
- -EXPORT_SYMBOL(buffer_check_dirty_writeback);
   
   /*
    * Block until a buffer comes unlocked.  This doesn't stop it
@@@ -194,19 -195,19 +194,19 @@@ __find_get_block_slow(struct block_devi
         pgoff_t index;
         struct buffer_head *bh;
         struct buffer_head *head;
- -      struct page *page;
+ +      struct folio *folio;
         int all_mapped = 1;
         static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
   
         index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
- -      page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
- -      if (!page)
+ +      folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0);
+ +      if (IS_ERR(folio))
                 goto out;
   
         spin_lock(&bd_mapping->private_lock);
- -      if (!page_has_buffers(page))
+ +      head = folio_buffers(folio);
+ +      if (!head)
                 goto out_unlock;
- -      head = page_buffers(page);
         bh = head;
         do {
                 if (!buffer_mapped(bh))
@@@ -236,7 -237,7 +236,7 @@@
         }
   out_unlock:
         spin_unlock(&bd_mapping->private_lock);
- -      put_page(page);
+ +      folio_put(folio);
   out:
         return ret;
   }
@@@ -591,6 -592,76 +591,76 @@@ int sync_mapping_buffers(struct address
   }
   EXPORT_SYMBOL(sync_mapping_buffers);
   
+ /**
+  * generic_buffers_fsync_noflush - generic buffer fsync implementation
+  * for simple filesystems with no inode lock
+  *
+  * @file:     file to synchronize
+  * @start:    start offset in bytes
+  * @end:      end offset in bytes (inclusive)
+  * @datasync: only synchronize essential metadata if true
+  *
+  * This is a generic implementation of the fsync method for simple
+  * filesystems which track all non-inode metadata in the buffers list
+  * hanging off the address_space structure.
+  */
+ int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
+                                 bool datasync)
+ {
+       struct inode *inode = file->f_mapping->host;
+       int err;
+       int ret;
+ 
+       err = file_write_and_wait_range(file, start, end);
+       if (err)
+               return err;
+ 
+       ret = sync_mapping_buffers(inode->i_mapping);
+       if (!(inode->i_state & I_DIRTY_ALL))
+               goto out;
+       if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+               goto out;
+ 
+       err = sync_inode_metadata(inode, 1);
+       if (ret == 0)
+               ret = err;
+ 
+ out:
+       /* check and advance again to catch errors after syncing out buffers */
+       err = file_check_and_advance_wb_err(file);
+       if (ret == 0)
+               ret = err;
+       return ret;
+ }
+ EXPORT_SYMBOL(generic_buffers_fsync_noflush);
+ 
+ /**
+  * generic_buffers_fsync - generic buffer fsync implementation
+  * for simple filesystems with no inode lock
+  *
+  * @file:     file to synchronize
+  * @start:    start offset in bytes
+  * @end:      end offset in bytes (inclusive)
+  * @datasync: only synchronize essential metadata if true
+  *
+  * This is a generic implementation of the fsync method for simple
+  * filesystems which track all non-inode metadata in the buffers list
+  * hanging off the address_space structure. This also makes sure that
+  * a device cache flush operation is called at the end.
+  */
+ int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
+                         bool datasync)
+ {
+       struct inode *inode = file->f_mapping->host;
+       int ret;
+ 
+       ret = generic_buffers_fsync_noflush(file, start, end, datasync);
+       if (!ret)
+               ret = blkdev_issue_flush(inode->i_sb->s_bdev);
+       return ret;
+ }
+ EXPORT_SYMBOL(generic_buffers_fsync);
+ 
   /*
    * Called when we've recently written block `bblock', and it is known that
    * `bblock' was for a buffer_boundary() buffer.  This means that the block at
@@@ -906,8 -977,8 +976,8 @@@ struct buffer_head *alloc_page_buffers(
   }
   EXPORT_SYMBOL_GPL(alloc_page_buffers);
   
- -static inline void
- -link_dev_buffers(struct page *page, struct buffer_head *head)
+ +static inline void link_dev_buffers(struct folio *folio,
+ +              struct buffer_head *head)
   {
         struct buffer_head *bh, *tail;
   
@@@ -917,7 -988,7 +987,7 @@@
                 bh = bh->b_this_page;
         } while (bh);
         tail->b_this_page = head;
- -      attach_page_private(page, head);
+ +      folio_attach_private(folio, head);
   }
   
   static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
@@@ -933,14 -1004,15 +1003,14 @@@
   }
   
   /*
- - * Initialise the state of a blockdev page's buffers.
+ + * Initialise the state of a blockdev folio's buffers.
    */ 
- -static sector_t
- -init_page_buffers(struct page *page, struct block_device *bdev,
- -                      sector_t block, int size)
+ +static sector_t folio_init_buffers(struct folio *folio,
+ +              struct block_device *bdev, sector_t block, int size)
   {
- -      struct buffer_head *head = page_buffers(page);
+ +      struct buffer_head *head = folio_buffers(folio);
         struct buffer_head *bh = head;
- -      int uptodate = PageUptodate(page);
+ +      bool uptodate = folio_test_uptodate(folio);
         sector_t end_block = blkdev_max_block(bdev, size);
   
         do {
@@@ -974,7 -1046,7 +1044,7 @@@ grow_dev_page(struct block_device *bdev
               pgoff_t index, int size, int sizebits, gfp_t gfp)
   {
         struct inode *inode = bdev->bd_inode;
- -      struct page *page;
+ +      struct folio *folio;
         struct buffer_head *bh;
         sector_t end_block;
         int ret = 0;
@@@ -990,37 -1062,42 +1060,37 @@@
          */
         gfp_mask |= __GFP_NOFAIL;
   
- -      page = find_or_create_page(inode->i_mapping, index, gfp_mask);
+ +      folio = __filemap_get_folio(inode->i_mapping, index,
+ +                      FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask);
   
- -      BUG_ON(!PageLocked(page));
- -
- -      if (page_has_buffers(page)) {
- -              bh = page_buffers(page);
+ +      bh = folio_buffers(folio);
+ +      if (bh) {
                 if (bh->b_size == size) {
- -                      end_block = init_page_buffers(page, bdev,
- -                                              (sector_t)index << sizebits,
- -                                              size);
+ +                      end_block = folio_init_buffers(folio, bdev,
+ +                                      (sector_t)index << sizebits, size);
                         goto done;
                 }
- -              if (!try_to_free_buffers(page_folio(page)))
+ +              if (!try_to_free_buffers(folio))
                         goto failed;
         }
   
- -      /*
- -       * Allocate some buffers for this page
- -       */
- -      bh = alloc_page_buffers(page, size, true);
+ +      bh = folio_alloc_buffers(folio, size, true);
   
         /*
- -       * Link the page to the buffers and initialise them.  Take the
+ +       * Link the folio to the buffers and initialise them.  Take the
          * lock to be atomic wrt __find_get_block(), which does not
- -       * run under the page lock.
+ +       * run under the folio lock.
          */
         spin_lock(&inode->i_mapping->private_lock);
- -      link_dev_buffers(page, bh);
- -      end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
- -                      size);
+ +      link_dev_buffers(folio, bh);
+ +      end_block = folio_init_buffers(folio, bdev,
+ +                      (sector_t)index << sizebits, size);
         spin_unlock(&inode->i_mapping->private_lock);
   done:
         ret = (block < end_block) ? 1 : -ENXIO;
   failed:
- -      unlock_page(page);
- -      put_page(page);
+ +      folio_unlock(folio);
+ +      folio_put(folio);
         return ret;
   }
   
@@@ -1757,7 -1834,7 +1827,7 @@@ static struct buffer_head *folio_create
    * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
    * causes the writes to be flagged as synchronous writes.
    */
- -int __block_write_full_page(struct inode *inode, struct page *page,
+ +int __block_write_full_folio(struct inode *inode, struct folio *folio,
                         get_block_t *get_block, struct writeback_control *wbc,
                         bh_end_io_t *handler)
   {
@@@ -1769,14 -1846,14 +1839,14 @@@
         int nr_underway = 0;
         blk_opf_t write_flags = wbc_to_write_flags(wbc);
   
- -      head = folio_create_buffers(page_folio(page), inode,
+ +      head = folio_create_buffers(folio, inode,
                                     (1 << BH_Dirty) | (1 << BH_Uptodate));
   
         /*
          * Be very careful.  We have no exclusion from block_dirty_folio
          * here, and the (potentially unmapped) buffers may become dirty at
          * any time.  If a buffer becomes dirty here after we've inspected it
- -       * then we just miss that fact, and the page stays dirty.
+ +       * then we just miss that fact, and the folio stays dirty.
          *
          * Buffers outside i_size may be dirtied by block_dirty_folio;
          * handle that here by just cleaning them.
@@@ -1786,7 -1863,7 +1856,7 @@@
         blocksize = bh->b_size;
         bbits = block_size_bits(blocksize);
   
- -      block = (sector_t)page->index << (PAGE_SHIFT - bbits);
+ +      block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
         last_block = (i_size_read(inode) - 1) >> bbits;
   
         /*
@@@ -1797,7 -1874,7 +1867,7 @@@
                 if (block > last_block) {
                         /*
                          * mapped buffers outside i_size will occur, because
- -                       * this page can be outside i_size when there is a
+ +                       * this folio can be outside i_size when there is a
                          * truncate in progress.
                          */
                         /*
@@@ -1827,7 -1904,7 +1897,7 @@@
                         continue;
                 /*
                  * If it's a fully non-blocking write attempt and we cannot
- -               * lock the buffer then redirty the page.  Note that this can
+ +               * lock the buffer then redirty the folio.  Note that this can
                  * potentially cause a busy-wait loop from writeback threads
                  * and kswapd activity, but those code paths have their own
                  * higher-level throttling.
@@@ -1835,7 -1912,7 +1905,7 @@@
                 if (wbc->sync_mode != WB_SYNC_NONE) {
                         lock_buffer(bh);
                 } else if (!trylock_buffer(bh)) {
- -                      redirty_page_for_writepage(wbc, page);
+ +                      folio_redirty_for_writepage(wbc, folio);
                         continue;
                 }
                 if (test_clear_buffer_dirty(bh)) {
@@@ -1846,11 -1923,11 +1916,11 @@@
         } while ((bh = bh->b_this_page) != head);
   
         /*
- -       * The page and its buffers are protected by PageWriteback(), so we can
- -       * drop the bh refcounts early.
+ +       * The folio and its buffers are protected by the writeback flag,
+ +       * so we can drop the bh refcounts early.
          */
- -      BUG_ON(PageWriteback(page));
- -      set_page_writeback(page);
+ +      BUG_ON(folio_test_writeback(folio));
+ +      folio_start_writeback(folio);
   
         do {
                 struct buffer_head *next = bh->b_this_page;
@@@ -1860,20 -1937,20 +1930,20 @@@
                 }
                 bh = next;
         } while (bh != head);
- -      unlock_page(page);
+ +      folio_unlock(folio);
   
         err = 0;
   done:
         if (nr_underway == 0) {
                 /*
- -               * The page was marked dirty, but the buffers were
+ +               * The folio was marked dirty, but the buffers were
                  * clean.  Someone wrote them back by hand with
                  * write_dirty_buffer/submit_bh.  A rare case.
                  */
- -              end_page_writeback(page);
+ +              folio_end_writeback(folio);
   
                 /*
- -               * The page and buffer_heads can be released at any time from
+ +               * The folio and buffer_heads can be released at any time from
                  * here on.
                  */
         }
@@@ -1884,7 -1961,7 +1954,7 @@@ recover
          * ENOSPC, or some other error.  We may already have added some
          * blocks to the file, so we need to write these out to avoid
          * exposing stale data.
- -       * The page is currently locked and not marked for writeback
+ +       * The folio is currently locked and not marked for writeback
          */
         bh = head;
         /* Recovery: lock and submit the mapped buffers */
@@@ -1896,15 -1973,15 +1966,15 @@@
                 } else {
                         /*
                          * The buffer may have been set dirty during
- -                       * attachment to a dirty page.
+ +                       * attachment to a dirty folio.
                          */
                         clear_buffer_dirty(bh);
                 }
         } while ((bh = bh->b_this_page) != head);
- -      SetPageError(page);
- -      BUG_ON(PageWriteback(page));
- -      mapping_set_error(page->mapping, err);
- -      set_page_writeback(page);
+ +      folio_set_error(folio);
+ +      BUG_ON(folio_test_writeback(folio));
+ +      mapping_set_error(folio->mapping, err);
+ +      folio_start_writeback(folio);
         do {
                 struct buffer_head *next = bh->b_this_page;
                 if (buffer_async_write(bh)) {
@@@ -1914,40 -1991,39 +1984,40 @@@
                 }
                 bh = next;
         } while (bh != head);
- -      unlock_page(page);
+ +      folio_unlock(folio);
         goto done;
   }
- -EXPORT_SYMBOL(__block_write_full_page);
+ +EXPORT_SYMBOL(__block_write_full_folio);
   
   /*
- - * If a page has any new buffers, zero them out here, and mark them uptodate
+ + * If a folio has any new buffers, zero them out here, and mark them uptodate
    * and dirty so they'll be written out (in order to prevent uninitialised
    * block data from leaking). And clear the new bit.
    */
- -void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+ +void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to)
   {
- -      unsigned int block_start, block_end;
+ +      size_t block_start, block_end;
         struct buffer_head *head, *bh;
   
- -      BUG_ON(!PageLocked(page));
- -      if (!page_has_buffers(page))
+ +      BUG_ON(!folio_test_locked(folio));
+ +      head = folio_buffers(folio);
+ +      if (!head)
                 return;
   
- -      bh = head = page_buffers(page);
+ +      bh = head;
         block_start = 0;
         do {
                 block_end = block_start + bh->b_size;
   
                 if (buffer_new(bh)) {
                         if (block_end > from && block_start < to) {
- -                              if (!PageUptodate(page)) {
- -                                      unsigned start, size;
+ +                              if (!folio_test_uptodate(folio)) {
+ +                                      size_t start, xend;
   
                                         start = max(from, block_start);
- -                                      size = min(to, block_end) - start;
+ +                                      xend = min(to, block_end);
   
- -                                      zero_user(page, start, size);
+ +                                      folio_zero_segment(folio, start, xend);
                                         set_buffer_uptodate(bh);
                                 }
   
@@@ -1960,7 -2036,7 +2030,7 @@@
                 bh = bh->b_this_page;
         } while (bh != head);
   }
- -EXPORT_SYMBOL(page_zero_new_buffers);
+ +EXPORT_SYMBOL(folio_zero_new_buffers);
   
   static void
   iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
@@@ -2098,7 -2174,7 +2168,7 @@@ int __block_write_begin_int(struct foli
                         err = -EIO;
         }
         if (unlikely(err))
- -              page_zero_new_buffers(&folio->page, from, to);
+ +              folio_zero_new_buffers(folio, from, to);
         return err;
   }
   
@@@ -2110,15 -2186,15 +2180,15 @@@ int __block_write_begin(struct page *pa
   }
   EXPORT_SYMBOL(__block_write_begin);
   
- -static int __block_commit_write(struct inode *inode, struct page *page,
- -              unsigned from, unsigned to)
+ +static int __block_commit_write(struct inode *inode, struct folio *folio,
+ +              size_t from, size_t to)
   {
- -      unsigned block_start, block_end;
- -      int partial = 0;
+ +      size_t block_start, block_end;
+ +      bool partial = false;
         unsigned blocksize;
         struct buffer_head *bh, *head;
   
- -      bh = head = page_buffers(page);
+ +      bh = head = folio_buffers(folio);
         blocksize = bh->b_size;
   
         block_start = 0;
@@@ -2126,7 -2202,7 +2196,7 @@@
                 block_end = block_start + blocksize;
                 if (block_end <= from || block_start >= to) {
                         if (!buffer_uptodate(bh))
- -                              partial = 1;
+ +                              partial = true;
                 } else {
                         set_buffer_uptodate(bh);
                         mark_buffer_dirty(bh);
@@@ -2141,11 -2217,11 +2211,11 @@@
         /*
          * If this is a partial write which happened to make all buffers
          * uptodate then we can optimize away a bogus read_folio() for
- -       * the next read(). Here we 'discover' whether the page went
+ +       * the next read(). Here we 'discover' whether the folio went
          * uptodate as a result of this (potentially partial) write.
          */
         if (!partial)
- -              SetPageUptodate(page);
+ +              folio_mark_uptodate(folio);
         return 0;
   }
   
@@@ -2182,9 -2258,10 +2252,9 @@@ int block_write_end(struct file *file, 
                         loff_t pos, unsigned len, unsigned copied,
                         struct page *page, void *fsdata)
   {
+ +      struct folio *folio = page_folio(page);
         struct inode *inode = mapping->host;
- -      unsigned start;
- -
- -      start = pos & (PAGE_SIZE - 1);
+ +      size_t start = pos - folio_pos(folio);
   
         if (unlikely(copied < len)) {
                 /*
@@@ -2196,18 -2273,18 +2266,18 @@@
                  * read_folio might come in and destroy our partial write.
                  *
                  * Do the simplest thing, and just treat any short write to a
- -               * non uptodate page as a zero-length write, and force the
+ +               * non uptodate folio as a zero-length write, and force the
                  * caller to redo the whole thing.
                  */
- -              if (!PageUptodate(page))
+ +              if (!folio_test_uptodate(folio))
                         copied = 0;
   
- -              page_zero_new_buffers(page, start+copied, start+len);
+ +              folio_zero_new_buffers(folio, start+copied, start+len);
         }
- -      flush_dcache_page(page);
+ +      flush_dcache_folio(folio);
   
         /* This could be a short (even 0-length) commit */
- -      __block_commit_write(inode, page, start, start+copied);
+ +      __block_commit_write(inode, folio, start, start + copied);
   
         return copied;
   }
@@@ -2530,9 -2607,8 +2600,9 @@@ EXPORT_SYMBOL(cont_write_begin)
   
   int block_commit_write(struct page *page, unsigned from, unsigned to)
   {
- -      struct inode *inode = page->mapping->host;
- -      __block_commit_write(inode,page,from,to);
+ +      struct folio *folio = page_folio(page);
+ +      struct inode *inode = folio->mapping->host;
+ +      __block_commit_write(inode, folio, from, to);
         return 0;
   }
   EXPORT_SYMBOL(block_commit_write);
@@@ -2558,37 -2634,38 +2628,37 @@@
   int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                          get_block_t get_block)
   {
- -      struct page *page = vmf->page;
+ +      struct folio *folio = page_folio(vmf->page);
         struct inode *inode = file_inode(vma->vm_file);
         unsigned long end;
         loff_t size;
         int ret;
   
- -      lock_page(page);
+ +      folio_lock(folio);
         size = i_size_read(inode);
- -      if ((page->mapping != inode->i_mapping) ||
- -          (page_offset(page) > size)) {
+ +      if ((folio->mapping != inode->i_mapping) ||
+ +          (folio_pos(folio) >= size)) {
                 /* We overload EFAULT to mean page got truncated */
                 ret = -EFAULT;
                 goto out_unlock;
         }
   
- -      /* page is wholly or partially inside EOF */
- -      if (((page->index + 1) << PAGE_SHIFT) > size)
- -              end = size & ~PAGE_MASK;
- -      else
- -              end = PAGE_SIZE;
+ +      end = folio_size(folio);
+ +      /* folio is wholly or partially inside EOF */
+ +      if (folio_pos(folio) + end > size)
+ +              end = size - folio_pos(folio);
   
- -      ret = __block_write_begin(page, 0, end, get_block);
+ +      ret = __block_write_begin_int(folio, 0, end, get_block, NULL);
         if (!ret)
- -              ret = block_commit_write(page, 0, end);
+ +              ret = __block_commit_write(inode, folio, 0, end);
   
         if (unlikely(ret < 0))
                 goto out_unlock;
- -      set_page_dirty(page);
- -      wait_for_stable_page(page);
+ +      folio_mark_dirty(folio);
+ +      folio_wait_stable(folio);
         return 0;
   out_unlock:
- -      unlock_page(page);
+ +      folio_unlock(folio);
         return ret;
   }
   EXPORT_SYMBOL(block_page_mkwrite);
@@@ -2597,16 -2674,17 +2667,16 @@@ int block_truncate_page(struct address_
                         loff_t from, get_block_t *get_block)
   {
         pgoff_t index = from >> PAGE_SHIFT;
- -      unsigned offset = from & (PAGE_SIZE-1);
         unsigned blocksize;
         sector_t iblock;
- -      unsigned length, pos;
+ +      size_t offset, length, pos;
         struct inode *inode = mapping->host;
- -      struct page *page;
+ +      struct folio *folio;
         struct buffer_head *bh;
         int err = 0;
   
         blocksize = i_blocksize(inode);
- -      length = offset & (blocksize - 1);
+ +      length = from & (blocksize - 1);
   
         /* Block boundary? Nothing to do */
         if (!length)
@@@ -2615,18 -2693,15 +2685,18 @@@
         length = blocksize - length;
         iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
         
- -      page = grab_cache_page(mapping, index);
- -      if (!page)
- -              return -ENOMEM;
- -
- -      if (!page_has_buffers(page))
- -              create_empty_buffers(page, blocksize, 0);
+ +      folio = filemap_grab_folio(mapping, index);
+ +      if (IS_ERR(folio))
+ +              return PTR_ERR(folio);
+ +
+ +      bh = folio_buffers(folio);
+ +      if (!bh) {
+ +              folio_create_empty_buffers(folio, blocksize, 0);
+ +              bh = folio_buffers(folio);
+ +      }
   
         /* Find the buffer that contains "offset" */
- -      bh = page_buffers(page);
+ +      offset = offset_in_folio(folio, from);
         pos = blocksize;
         while (offset >= pos) {
                 bh = bh->b_this_page;
@@@ -2645,7 -2720,7 +2715,7 @@@
         }
   
         /* Ok, it's mapped. Make sure it's up-to-date */
- -      if (PageUptodate(page))
+ +      if (folio_test_uptodate(folio))
                 set_buffer_uptodate(bh);
   
         if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
@@@ -2655,12 -2730,12 +2725,12 @@@
                         goto unlock;
         }
   
- -      zero_user(page, offset, length);
+ +      folio_zero_range(folio, offset, length);
         mark_buffer_dirty(bh);
   
   unlock:
- -      unlock_page(page);
- -      put_page(page);
+ +      folio_unlock(folio);
+ +      folio_put(folio);
   
         return err;
   }
@@@ -2672,32 -2747,33 +2742,32 @@@ EXPORT_SYMBOL(block_truncate_page)
   int block_write_full_page(struct page *page, get_block_t *get_block,
                         struct writeback_control *wbc)
   {
- -      struct inode * const inode = page->mapping->host;
+ +      struct folio *folio = page_folio(page);
+ +      struct inode * const inode = folio->mapping->host;
         loff_t i_size = i_size_read(inode);
- -      const pgoff_t end_index = i_size >> PAGE_SHIFT;
- -      unsigned offset;
   
- -      /* Is the page fully inside i_size? */
- -      if (page->index < end_index)
- -              return __block_write_full_page(inode, page, get_block, wbc,
+ +      /* Is the folio fully inside i_size? */
+ +      if (folio_pos(folio) + folio_size(folio) <= i_size)
+ +              return __block_write_full_folio(inode, folio, get_block, wbc,
                                                end_buffer_async_write);
   
- -      /* Is the page fully outside i_size? (truncate in progress) */
- -      offset = i_size & (PAGE_SIZE-1);
- -      if (page->index >= end_index+1 || !offset) {
- -              unlock_page(page);
+ +      /* Is the folio fully outside i_size? (truncate in progress) */
+ +      if (folio_pos(folio) >= i_size) {
+ +              folio_unlock(folio);
                 return 0; /* don't care */
         }
   
         /*
- -       * The page straddles i_size.  It must be zeroed out on each and every
+ +       * The folio straddles i_size.  It must be zeroed out on each and every
          * writepage invocation because it may be mmapped.  "A file is mapped
          * in multiples of the page size.  For a file that is not a multiple of
- -       * the  page size, the remaining memory is zeroed when mapped, and
+ +       * the page size, the remaining memory is zeroed when mapped, and
          * writes to that region are not written out to the file."
          */
- -      zero_user_segment(page, offset, PAGE_SIZE);
- -      return __block_write_full_page(inode, page, get_block, wbc,
- -                                                      end_buffer_async_write);
+ +      folio_zero_segment(folio, offset_in_folio(folio, i_size),
+ +                      folio_size(folio));
+ +      return __block_write_full_folio(inode, folio, get_block, wbc,
+ +                      end_buffer_async_write);
   }
   EXPORT_SYMBOL(block_write_full_page);
   
@@@ -2754,7 -2830,8 +2824,7 @@@ static void submit_bh_wbc(blk_opf_t opf
   
         bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
   
- -      bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
- -      BUG_ON(bio->bi_iter.bi_size != bh->b_size);
+ +      __bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
   
         bio->bi_end_io = end_bio_bh_io_sync;
         bio->bi_private = bh;
diff --combined fs/ext2/file.c

index d1ae0f0,7a32f20..0b4c91c
--- 1/fs/ext2/file.c
--- 2/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@@ -25,9 -25,11 +25,11 @@@
   #include <linux/quotaops.h>
   #include <linux/iomap.h>
   #include <linux/uio.h>
+ #include <linux/buffer_head.h>
   #include "ext2.h"
   #include "xattr.h"
   #include "acl.h"
+ #include "trace.h"
   
   #ifdef CONFIG_FS_DAX
   static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@@ -153,7 -155,7 +155,7 @@@ int ext2_fsync(struct file *file, loff_
         int ret;
         struct super_block *sb = file->f_mapping->host->i_sb;
   
-       ret = generic_file_fsync(file, start, end, datasync);
+       ret = generic_buffers_fsync(file, start, end, datasync);
         if (ret == -EIO)
                 /* We don't really know where the IO error happened... */
                 ext2_error(sb, __func__,
@@@ -161,12 -163,131 +163,131 @@@
         return ret;
   }
   
+ static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ {
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       ssize_t ret;
+ 
+       trace_ext2_dio_read_begin(iocb, to, 0);
+       inode_lock_shared(inode);
+       ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0);
+       inode_unlock_shared(inode);
+       trace_ext2_dio_read_end(iocb, to, ret);
+ 
+       return ret;
+ }
+ 
+ static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size,
+                                int error, unsigned int flags)
+ {
+       loff_t pos = iocb->ki_pos;
+       struct inode *inode = file_inode(iocb->ki_filp);
+ 
+       if (error)
+               goto out;
+ 
+       /*
+        * If we are extending the file, we have to update i_size here before
+        * page cache gets invalidated in iomap_dio_rw(). This prevents racing
+        * buffered reads from zeroing out too much from page cache pages.
+        * Note that all extending writes always happens synchronously with
+        * inode lock held by ext2_dio_write_iter(). So it is safe to update
+        * inode size here for extending file writes.
+        */
+       pos += size;
+       if (pos > i_size_read(inode)) {
+               i_size_write(inode, pos);
+               mark_inode_dirty(inode);
+       }
+ out:
+       trace_ext2_dio_write_endio(iocb, size, error);
+       return error;
+ }
+ 
+ static const struct iomap_dio_ops ext2_dio_write_ops = {
+       .end_io = ext2_dio_write_end_io,
+ };
+ 
+ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ {
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       ssize_t ret;
+       unsigned int flags = 0;
+       unsigned long blocksize = inode->i_sb->s_blocksize;
+       loff_t offset = iocb->ki_pos;
+       loff_t count = iov_iter_count(from);
+       ssize_t status = 0;
+ 
+       trace_ext2_dio_write_begin(iocb, from, 0);
+       inode_lock(inode);
+       ret = generic_write_checks(iocb, from);
+       if (ret <= 0)
+               goto out_unlock;
+ 
+       ret = kiocb_modified(iocb);
+       if (ret)
+               goto out_unlock;
+ 
+       /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */
+       if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) ||
+          (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize)))
+               flags |= IOMAP_DIO_FORCE_WAIT;
+ 
+       ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops,
+                          flags, NULL, 0);
+ 
+       /* ENOTBLK is magic return value for fallback to buffered-io */
+       if (ret == -ENOTBLK)
+               ret = 0;
+ 
+       if (ret < 0 && ret != -EIOCBQUEUED)
+               ext2_write_failed(inode->i_mapping, offset + count);
+ 
+       /* handle case for partial write and for fallback to buffered write */
+       if (ret >= 0 && iov_iter_count(from)) {
+               loff_t pos, endbyte;
+               int ret2;
+ 
+               iocb->ki_flags &= ~IOCB_DIRECT;
+               pos = iocb->ki_pos;
+               status = generic_perform_write(iocb, from);
+               if (unlikely(status < 0)) {
+                       ret = status;
+                       goto out_unlock;
+               }
+ 
+               iocb->ki_pos += status;
+               ret += status;
+               endbyte = pos + status - 1;
+               ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
+                                                   endbyte);
+               if (!ret2)
+                       invalidate_mapping_pages(inode->i_mapping,
+                                                pos >> PAGE_SHIFT,
+                                                endbyte >> PAGE_SHIFT);
+               if (ret > 0)
+                       generic_write_sync(iocb, ret);
+       }
+ 
+ out_unlock:
+       inode_unlock(inode);
+       if (status)
+               trace_ext2_dio_write_buff_end(iocb, from, status);
+       trace_ext2_dio_write_end(iocb, from, ret);
+       return ret;
+ }
+ 
   static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
   {
   #ifdef CONFIG_FS_DAX
         if (IS_DAX(iocb->ki_filp->f_mapping->host))
                 return ext2_dax_read_iter(iocb, to);
   #endif
+       if (iocb->ki_flags & IOCB_DIRECT)
+               return ext2_dio_read_iter(iocb, to);
+ 
         return generic_file_read_iter(iocb, to);
   }
   
@@@ -176,6 -297,9 +297,9 @@@ static ssize_t ext2_file_write_iter(str
         if (IS_DAX(iocb->ki_filp->f_mapping->host))
                 return ext2_dax_write_iter(iocb, from);
   #endif
+       if (iocb->ki_flags & IOCB_DIRECT)
+               return ext2_dio_write_iter(iocb, from);
+ 
         return generic_file_write_iter(iocb, from);
   }
   
@@@ -192,7 -316,7 +316,7 @@@ const struct file_operations ext2_file_
         .release        = ext2_release_file,
         .fsync          = ext2_fsync,
         .get_unmapped_area = thp_get_unmapped_area,
- -      .splice_read    = generic_file_splice_read,
+ +      .splice_read    = filemap_splice_read,
         .splice_write   = iter_file_splice_write,
   };
   
diff --combined fs/ext4/fsync.c

index 2a14320,9cd71d7..0c56f3a
--- 1/fs/ext4/fsync.c
--- 2/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@@ -28,6 -28,7 +28,7 @@@
   #include <linux/sched.h>
   #include <linux/writeback.h>
   #include <linux/blkdev.h>
+ #include <linux/buffer_head.h>
   
   #include "ext4.h"
   #include "ext4_jbd2.h"
@@@ -78,21 -79,13 +79,13 @@@ static int ext4_sync_parent(struct inod
         return ret;
   }
   
- static int ext4_fsync_nojournal(struct inode *inode, bool datasync,
-                               bool *needs_barrier)
+ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
+                               int datasync, bool *needs_barrier)
   {
-       int ret, err;
- 
-       ret = sync_mapping_buffers(inode->i_mapping);
-       if (!(inode->i_state & I_DIRTY_ALL))
-               return ret;
-       if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-               return ret;
- 
-       err = sync_inode_metadata(inode, 1);
-       if (!ret)
-               ret = err;
+       struct inode *inode = file->f_inode;
+       int ret;
   
+       ret = generic_buffers_fsync_noflush(file, start, end, datasync);
         if (!ret)
                 ret = ext4_sync_parent(inode);
         if (test_opt(inode->i_sb, BARRIER))
@@@ -108,13 -101,6 +101,13 @@@ static int ext4_fsync_journal(struct in
         journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
         tid_t commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
   
+ +      /*
+ +       * Fastcommit does not really support fsync on directories or other
+ +       * special files. Force a full commit.
+ +       */
+ +      if (!S_ISREG(inode->i_mode))
+ +              return ext4_force_commit(inode->i_sb);
+ +
         if (journal->j_flags & JBD2_BARRIER &&
             !jbd2_trans_will_send_data_barrier(journal, commit_tid))
                 *needs_barrier = true;
@@@ -155,6 -141,14 +148,14 @@@ int ext4_sync_file(struct file *file, l
                 goto out;
         }
   
+       if (!sbi->s_journal) {
+               ret = ext4_fsync_nojournal(file, start, end, datasync,
+                                          &needs_barrier);
+               if (needs_barrier)
+                       goto issue_flush;
+               goto out;
+       }
+ 
         ret = file_write_and_wait_range(file, start, end);
         if (ret)
                 goto out;
@@@ -164,11 -158,9 +165,9 @@@
          *  Metadata is in the journal, we wait for proper transaction to
          *  commit here.
          */
-       if (!sbi->s_journal)
-               ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier);
-       else
-               ret = ext4_fsync_journal(inode, datasync, &needs_barrier);
+       ret = ext4_fsync_journal(inode, datasync, &needs_barrier);
   
+ issue_flush:
         if (needs_barrier) {
                 err = blkdev_issue_flush(inode->i_sb->s_bdev);
                 if (!ret)
diff --combined fs/super.c

index 05ff6ab,6283cea..e781226
--- 1/fs/super.c
--- 2/fs/super.c
+++ b/fs/super.c
@@@ -54,7 -54,7 +54,7 @@@ static char *sb_writers_name[SB_FREEZE_
    * One thing we have to be careful of with a per-sb shrinker is that we don't
    * drop the last active reference to the superblock from within the shrinker.
    * If that happens we could trigger unregistering the shrinker from within the
- - * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we
+ + * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
    * take a passive reference to the superblock to avoid this from occurring.
    */
   static unsigned long super_cache_scan(struct shrinker *shrink,
@@@ -236,7 -236,6 +236,6 @@@ static struct super_block *alloc_super(
                                         &type->s_writers_key[i]))
                         goto fail;
         }
-       init_waitqueue_head(&s->s_writers.wait_unfrozen);
         s->s_bdi = &noop_backing_dev_info;
         s->s_flags = flags;
         if (s->s_user_ns != &init_user_ns)
@@@ -595,7 -594,7 +594,7 @@@ retry
         fc->s_fs_info = NULL;
         s->s_type = fc->fs_type;
         s->s_iflags |= fc->s_iflags;
- -      strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
+ +      strscpy(s->s_id, s->s_type->name, sizeof(s->s_id));
         list_add_tail(&s->s_list, &super_blocks);
         hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
         spin_unlock(&sb_lock);
@@@ -674,7 -673,7 +673,7 @@@ retry
                 return ERR_PTR(err);
         }
         s->s_type = type;
- -      strlcpy(s->s_id, type->name, sizeof(s->s_id));
+ +      strscpy(s->s_id, type->name, sizeof(s->s_id));
         list_add_tail(&s->s_list, &super_blocks);
         hlist_add_head(&s->s_instances, &type->fs_supers);
         spin_unlock(&sb_lock);
@@@ -903,7 -902,6 +902,7 @@@ int reconfigure_super(struct fs_contex
         struct super_block *sb = fc->root->d_sb;
         int retval;
         bool remount_ro = false;
+ +      bool remount_rw = false;
         bool force = fc->sb_flags & SB_FORCE;
   
         if (fc->sb_flags_mask & ~MS_RMT_MASK)
@@@ -921,7 -919,7 +920,7 @@@
                     bdev_read_only(sb->s_bdev))
                         return -EACCES;
   #endif
- -
+ +              remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb);
                 remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
         }
   
@@@ -944,18 -942,13 +943,18 @@@
          */
         if (remount_ro) {
                 if (force) {
- -                      sb->s_readonly_remount = 1;
- -                      smp_wmb();
+ +                      sb_start_ro_state_change(sb);
                 } else {
                         retval = sb_prepare_remount_readonly(sb);
                         if (retval)
                                 return retval;
                 }
+ +      } else if (remount_rw) {
+ +              /*
+ +               * Protect filesystem's reconfigure code from writes from
+ +               * userspace until reconfigure finishes.
+ +               */
+ +              sb_start_ro_state_change(sb);
         }
   
         if (fc->ops->reconfigure) {
@@@ -971,7 -964,9 +970,7 @@@
   
         WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
                                  (fc->sb_flags & fc->sb_flags_mask)));
- -      /* Needs to be ordered wrt mnt_is_readonly() */
- -      smp_wmb();
- -      sb->s_readonly_remount = 0;
+ +      sb_end_ro_state_change(sb);
   
         /*
          * Some filesystems modify their metadata via some other path than the
@@@ -986,7 -981,7 +985,7 @@@
         return 0;
   
   cancel_readonly:
- -      sb->s_readonly_remount = 0;
+ +      sb_end_ro_state_change(sb);
         return retval;
   }
   
@@@ -1210,22 -1205,6 +1209,22 @@@ int get_tree_keyed(struct fs_context *f
   EXPORT_SYMBOL(get_tree_keyed);
   
   #ifdef CONFIG_BLOCK
+ +static void fs_mark_dead(struct block_device *bdev)
+ +{
+ +      struct super_block *sb;
+ +
+ +      sb = get_super(bdev);
+ +      if (!sb)
+ +              return;
+ +
+ +      if (sb->s_op->shutdown)
+ +              sb->s_op->shutdown(sb);
+ +      drop_super(sb);
+ +}
+ +
+ +static const struct blk_holder_ops fs_holder_ops = {
+ +      .mark_dead              = fs_mark_dead,
+ +};
   
   static int set_bdev_super(struct super_block *s, void *data)
   {
@@@ -1259,13 -1238,16 +1258,13 @@@ int get_tree_bdev(struct fs_context *fc
   {
         struct block_device *bdev;
         struct super_block *s;
- -      fmode_t mode = FMODE_READ | FMODE_EXCL;
         int error = 0;
   
- -      if (!(fc->sb_flags & SB_RDONLY))
- -              mode |= FMODE_WRITE;
- -
         if (!fc->source)
                 return invalf(fc, "No source specified");
   
- -      bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type);
+ +      bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags),
+ +                                fc->fs_type, &fs_holder_ops);
         if (IS_ERR(bdev)) {
                 errorf(fc, "%s: Can't open blockdev", fc->source);
                 return PTR_ERR(bdev);
@@@ -1279,7 -1261,7 +1278,7 @@@
         if (bdev->bd_fsfreeze_count > 0) {
                 mutex_unlock(&bdev->bd_fsfreeze_mutex);
                 warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
- -              blkdev_put(bdev, mode);
+ +              blkdev_put(bdev, fc->fs_type);
                 return -EBUSY;
         }
   
@@@ -1288,7 -1270,7 +1287,7 @@@
         s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
         mutex_unlock(&bdev->bd_fsfreeze_mutex);
         if (IS_ERR(s)) {
- -              blkdev_put(bdev, mode);
+ +              blkdev_put(bdev, fc->fs_type);
                 return PTR_ERR(s);
         }
   
@@@ -1297,7 -1279,7 +1296,7 @@@
                 if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
                         warnf(fc, "%pg: Can't mount, would change RO state", bdev);
                         deactivate_locked_super(s);
- -                      blkdev_put(bdev, mode);
+ +                      blkdev_put(bdev, fc->fs_type);
                         return -EBUSY;
                 }
   
@@@ -1309,9 -1291,10 +1308,9 @@@
                  * holding an active reference.
                  */
                 up_write(&s->s_umount);
- -              blkdev_put(bdev, mode);
+ +              blkdev_put(bdev, fc->fs_type);
                 down_write(&s->s_umount);
         } else {
- -              s->s_mode = mode;
                 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
                 shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
                                         fc->fs_type->name, s->s_id);
@@@ -1343,10 -1326,13 +1342,10 @@@ struct dentry *mount_bdev(struct file_s
   {
         struct block_device *bdev;
         struct super_block *s;
- -      fmode_t mode = FMODE_READ | FMODE_EXCL;
         int error = 0;
   
- -      if (!(flags & SB_RDONLY))
- -              mode |= FMODE_WRITE;
- -
- -      bdev = blkdev_get_by_path(dev_name, mode, fs_type);
+ +      bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
+ +                                &fs_holder_ops);
         if (IS_ERR(bdev))
                 return ERR_CAST(bdev);
   
@@@ -1382,9 -1368,10 +1381,9 @@@
                  * holding an active reference.
                  */
                 up_write(&s->s_umount);
- -              blkdev_put(bdev, mode);
+ +              blkdev_put(bdev, fs_type);
                 down_write(&s->s_umount);
         } else {
- -              s->s_mode = mode;
                 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
                 shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
                                         fs_type->name, s->s_id);
@@@ -1404,7 -1391,7 +1403,7 @@@
   error_s:
         error = PTR_ERR(s);
   error_bdev:
- -      blkdev_put(bdev, mode);
+ +      blkdev_put(bdev, fs_type);
   error:
         return ERR_PTR(error);
   }
@@@ -1413,11 -1400,13 +1412,11 @@@ EXPORT_SYMBOL(mount_bdev)
   void kill_block_super(struct super_block *sb)
   {
         struct block_device *bdev = sb->s_bdev;
- -      fmode_t mode = sb->s_mode;
   
         bdev->bd_super = NULL;
         generic_shutdown_super(sb);
         sync_blockdev(bdev);
- -      WARN_ON_ONCE(!(mode & FMODE_EXCL));
- -      blkdev_put(bdev, mode | FMODE_EXCL);
+ +      blkdev_put(bdev, sb->s_type);
   }
   
   EXPORT_SYMBOL(kill_block_super);
@@@ -1716,7 -1705,6 +1715,6 @@@ int freeze_super(struct super_block *sb
         if (ret) {
                 sb->s_writers.frozen = SB_UNFROZEN;
                 sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
-               wake_up(&sb->s_writers.wait_unfrozen);
                 deactivate_locked_super(sb);
                 return ret;
         }
@@@ -1732,7 -1720,6 +1730,6 @@@
                                 "VFS:Filesystem freeze failed\n");
                         sb->s_writers.frozen = SB_UNFROZEN;
                         sb_freeze_unlock(sb, SB_FREEZE_FS);
-                       wake_up(&sb->s_writers.wait_unfrozen);
                         deactivate_locked_super(sb);
                         return ret;
                 }
@@@ -1778,7 -1765,6 +1775,6 @@@ static int thaw_super_locked(struct sup
         sb->s_writers.frozen = SB_UNFROZEN;
         sb_freeze_unlock(sb, SB_FREEZE_FS);
   out:
-       wake_up(&sb->s_writers.wait_unfrozen);
         deactivate_locked_super(sb);
         return 0;
   }
diff --combined fs/udf/file.c

index 29daf5d,b871b85..243840d
--- 1/fs/udf/file.c
--- 2/fs/udf/file.c
+++ b/fs/udf/file.c
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0-only
   /*
    * file.c
    *
@@@ -5,11 -6,6 +6,6 @@@
    *  File handling routines for the OSTA-UDF(tm) filesystem.
    *
    * COPYRIGHT
-  *  This file is distributed under the terms of the GNU General Public
-  *  License (GPL). Copies of the GPL can be obtained from:
-  *    ftp://prep.ai.mit.edu/pub/gnu/GPL
-  *  Each contributing author retains all rights to their own work.
-  *
    *  (C) 1998-1999 Dave Boynton
    *  (C) 1998-2004 Ben Fennema
    *  (C) 1999-2000 Stelias Computing Inc
@@@ -209,7 -205,7 +205,7 @@@ const struct file_operations udf_file_o
         .write_iter             = udf_file_write_iter,
         .release                = udf_release_file,
         .fsync                  = generic_file_fsync,
- -      .splice_read            = generic_file_splice_read,
+ +      .splice_read            = filemap_splice_read,
         .splice_write           = iter_file_splice_write,
         .llseek                 = generic_file_llseek,
   };
diff --combined fs/udf/namei.c

index fd29a66,49e1e0f..a95579b
--- 1/fs/udf/namei.c
--- 2/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0-only
   /*
    * namei.c
    *
@@@ -5,11 -6,6 +6,6 @@@
    *      Inode name handling routines for the OSTA-UDF(tm) filesystem.
    *
    * COPYRIGHT
-  *      This file is distributed under the terms of the GNU General Public
-  *      License (GPL). Copies of the GPL can be obtained from:
-  *              ftp://prep.ai.mit.edu/pub/gnu/GPL
-  *      Each contributing author retains all rights to their own work.
-  *
    *  (C) 1998-2004 Ben Fennema
    *  (C) 1999-2000 Stelias Computing Inc
    *
@@@ -793,6 -789,11 +789,6 @@@ static int udf_rename(struct mnt_idmap 
                         if (!empty_dir(new_inode))
                                 goto out_oiter;
                 }
- -              /*
- -               * We need to protect against old_inode getting converted from
- -               * ICB to normal directory.
- -               */
- -              inode_lock_nested(old_inode, I_MUTEX_NONDIR2);
                 retval = udf_fiiter_find_entry(old_inode, &dotdot_name,
                                                &diriter);
                 if (retval == -ENOENT) {
@@@ -801,8 -802,10 +797,8 @@@
                                 old_inode->i_ino);
                         retval = -EFSCORRUPTED;
                 }
- -              if (retval) {
- -                      inode_unlock(old_inode);
+ +              if (retval)
                         goto out_oiter;
- -              }
                 has_diriter = true;
                 tloc = lelb_to_cpu(diriter.fi.icb.extLocation);
                 if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
@@@ -882,6 -885,7 +878,6 @@@
                                udf_dir_entry_len(&diriter.fi));
                 udf_fiiter_write_fi(&diriter, NULL);
                 udf_fiiter_release(&diriter);
- -              inode_unlock(old_inode);
   
                 inode_dec_link_count(old_dir);
                 if (new_inode)
@@@ -893,8 -897,10 +889,8 @@@
         }
         return 0;
   out_oiter:
- -      if (has_diriter) {
+ +      if (has_diriter)
                 udf_fiiter_release(&diriter);
- -              inode_unlock(old_inode);
- -      }
         udf_fiiter_release(&oiter);
   
         return retval;
diff --combined include/linux/buffer_head.h

index c794ea7,1bd73ce..6cb3e9a
--- 1/include/linux/buffer_head.h
--- 2/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@@ -217,6 -217,10 +217,10 @@@ int inode_has_buffers(struct inode *)
   void invalidate_inode_buffers(struct inode *);
   int remove_inode_buffers(struct inode *inode);
   int sync_mapping_buffers(struct address_space *mapping);
+ int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
+                                 bool datasync);
+ int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
+                         bool datasync);
   void clean_bdev_aliases(struct block_device *bdev, sector_t block,
                         sector_t len);
   static inline void clean_bdev_bh_alias(struct buffer_head *bh)
@@@ -263,7 -267,7 +267,7 @@@ extern int buffer_heads_over_limit
   void block_invalidate_folio(struct folio *folio, size_t offset, size_t length);
   int block_write_full_page(struct page *page, get_block_t *get_block,
                                 struct writeback_control *wbc);
- -int __block_write_full_page(struct inode *inode, struct page *page,
+ +int __block_write_full_folio(struct inode *inode, struct folio *folio,
                         get_block_t *get_block, struct writeback_control *wbc,
                         bh_end_io_t *handler);
   int block_read_full_folio(struct folio *, get_block_t *);
@@@ -278,7 -282,7 +282,7 @@@ int block_write_end(struct file *, stru
   int generic_write_end(struct file *, struct address_space *,
                                 loff_t, unsigned, unsigned,
                                 struct page *, void *);
- -void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
+ +void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to);
   void clean_page_buffers(struct page *page);
   int cont_write_begin(struct file *, struct address_space *, loff_t,
                         unsigned, struct page **, void **,
diff --combined include/linux/fs.h

index d4b67bd,3b65a61..6867512
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -119,6 -119,13 +119,6 @@@ typedef int (dio_iodone_t)(struct kioc
   #define FMODE_PWRITE          ((__force fmode_t)0x10)
   /* File is opened for execution with sys_execve / sys_uselib */
   #define FMODE_EXEC            ((__force fmode_t)0x20)
- -/* File is opened with O_NDELAY (only set for block devices) */
- -#define FMODE_NDELAY          ((__force fmode_t)0x40)
- -/* File is opened with O_EXCL (only set for block devices) */
- -#define FMODE_EXCL            ((__force fmode_t)0x80)
- -/* File is opened using open(.., 3, ..) and is writeable only for ioctls
- -   (specialy hack for floppy.c) */
- -#define FMODE_WRITE_IOCTL     ((__force fmode_t)0x100)
   /* 32bit hashes as llseek() offset (for directories) */
   #define FMODE_32BITHASH         ((__force fmode_t)0x200)
   /* 64bit hashes as llseek() offset (for directories) */
@@@ -164,9 -171,6 +164,9 @@@
   /* File supports non-exclusive O_DIRECT writes from multiple threads */
   #define FMODE_DIO_PARALLEL_WRITE      ((__force fmode_t)0x1000000)
   
+ +/* File is embedded in backing_file object */
+ +#define FMODE_BACKING         ((__force fmode_t)0x2000000)
+ +
   /* File was opened by fanotify and shouldn't generate fanotify events */
   #define FMODE_NONOTIFY                ((__force fmode_t)0x4000000)
   
@@@ -952,35 -956,29 +952,35 @@@ static inline int ra_has_index(struct f
                 index <  ra->start + ra->size);
   }
   
+ +/*
+ + * f_{lock,count,pos_lock} members can be highly contended and share
+ + * the same cacheline. f_{lock,mode} are very frequently used together
+ + * and so share the same cacheline as well. The read-mostly
+ + * f_{path,inode,op} are kept on a separate cacheline.
+ + */
   struct file {
         union {
                 struct llist_node       f_llist;
                 struct rcu_head         f_rcuhead;
                 unsigned int            f_iocb_flags;
         };
- -      struct path             f_path;
- -      struct inode            *f_inode;       /* cached value */
- -      const struct file_operations    *f_op;
   
         /*
          * Protects f_ep, f_flags.
          * Must not be taken from IRQ context.
          */
         spinlock_t              f_lock;
- -      atomic_long_t           f_count;
- -      unsigned int            f_flags;
         fmode_t                 f_mode;
+ +      atomic_long_t           f_count;
         struct mutex            f_pos_lock;
         loff_t                  f_pos;
+ +      unsigned int            f_flags;
         struct fown_struct      f_owner;
         const struct cred       *f_cred;
         struct file_ra_state    f_ra;
+ +      struct path             f_path;
+ +      struct inode            *f_inode;       /* cached value */
+ +      const struct file_operations    *f_op;
   
         u64                     f_version;
   #ifdef CONFIG_SECURITY
@@@ -1078,29 -1076,29 +1078,29 @@@ extern int send_sigurg(struct fown_stru
    * sb->s_flags.  Note that these mirror the equivalent MS_* flags where
    * represented in both.
    */
- -#define SB_RDONLY      1      /* Mount read-only */
- -#define SB_NOSUID      2      /* Ignore suid and sgid bits */
- -#define SB_NODEV       4      /* Disallow access to device special files */
- -#define SB_NOEXEC      8      /* Disallow program execution */
- -#define SB_SYNCHRONOUS        16      /* Writes are synced at once */
- -#define SB_MANDLOCK   64      /* Allow mandatory locks on an FS */
- -#define SB_DIRSYNC    128     /* Directory modifications are synchronous */
- -#define SB_NOATIME    1024    /* Do not update access times. */
- -#define SB_NODIRATIME 2048    /* Do not update directory access times */
- -#define SB_SILENT     32768
- -#define SB_POSIXACL   (1<<16) /* VFS does not apply the umask */
- -#define SB_INLINECRYPT        (1<<17) /* Use blk-crypto for encrypted files */
- -#define SB_KERNMOUNT  (1<<22) /* this is a kern_mount call */
- -#define SB_I_VERSION  (1<<23) /* Update inode I_version field */
- -#define SB_LAZYTIME   (1<<25) /* Update the on-disk [acm]times lazily */
+ +#define SB_RDONLY       BIT(0)        /* Mount read-only */
+ +#define SB_NOSUID       BIT(1)        /* Ignore suid and sgid bits */
+ +#define SB_NODEV        BIT(2)        /* Disallow access to device special files */
+ +#define SB_NOEXEC       BIT(3)        /* Disallow program execution */
+ +#define SB_SYNCHRONOUS  BIT(4)        /* Writes are synced at once */
+ +#define SB_MANDLOCK     BIT(6)        /* Allow mandatory locks on an FS */
+ +#define SB_DIRSYNC      BIT(7)        /* Directory modifications are synchronous */
+ +#define SB_NOATIME      BIT(10)       /* Do not update access times. */
+ +#define SB_NODIRATIME   BIT(11)       /* Do not update directory access times */
+ +#define SB_SILENT       BIT(15)
+ +#define SB_POSIXACL     BIT(16)       /* VFS does not apply the umask */
+ +#define SB_INLINECRYPT  BIT(17)       /* Use blk-crypto for encrypted files */
+ +#define SB_KERNMOUNT    BIT(22)       /* this is a kern_mount call */
+ +#define SB_I_VERSION    BIT(23)       /* Update inode I_version field */
+ +#define SB_LAZYTIME     BIT(25)       /* Update the on-disk [acm]times lazily */
   
   /* These sb flags are internal to the kernel */
- -#define SB_SUBMOUNT     (1<<26)
- -#define SB_FORCE      (1<<27)
- -#define SB_NOSEC      (1<<28)
- -#define SB_BORN               (1<<29)
- -#define SB_ACTIVE     (1<<30)
- -#define SB_NOUSER     (1<<31)
+ +#define SB_SUBMOUNT     BIT(26)
+ +#define SB_FORCE        BIT(27)
+ +#define SB_NOSEC        BIT(28)
+ +#define SB_BORN         BIT(29)
+ +#define SB_ACTIVE       BIT(30)
+ +#define SB_NOUSER       BIT(31)
   
   /* These flags relate to encoding and casefolding */
   #define SB_ENC_STRICT_MODE_FL (1 << 0)
@@@ -1148,7 -1146,6 +1148,6 @@@ enum 
   
   struct sb_writers {
         int                             frozen;         /* Is sb frozen? */
-       wait_queue_head_t               wait_unfrozen;  /* wait for thaw */
         struct percpu_rw_semaphore      rw_sem[SB_FREEZE_LEVELS];
   };
   
@@@ -1217,6 -1214,7 +1216,6 @@@ struct super_block 
         uuid_t                  s_uuid;         /* UUID */
   
         unsigned int            s_max_links;
- -      fmode_t                 s_mode;
   
         /*
          * The next field is for VFS *only*. No filesystems have any business
@@@ -1243,7 -1241,7 +1242,7 @@@
          */
         atomic_long_t s_fsnotify_connectors;
   
- -      /* Being remounted read-only */
+ +      /* Read-only state of the superblock is being changed */
         int s_readonly_remount;
   
         /* per-sb errseq_t for reporting writeback errors via syncfs */
@@@ -1673,12 -1671,9 +1672,12 @@@ static inline int vfs_whiteout(struct m
                          WHITEOUT_DEV);
   }
   
- -struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
- -                      const struct path *parentpath,
- -                      umode_t mode, int open_flag, const struct cred *cred);
+ +struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
+ +                               const struct path *parentpath,
+ +                               umode_t mode, int open_flag,
+ +                               const struct cred *cred);
+ +struct file *kernel_file_open(const struct path *path, int flags,
+ +                            struct inode *inode, const struct cred *cred);
   
   int vfs_mkobj(struct dentry *, umode_t,
                 int (*f)(struct dentry *, umode_t, void *),
@@@ -1794,12 -1789,12 +1793,12 @@@ struct file_operations 
         int (*fsync) (struct file *, loff_t, loff_t, int datasync);
         int (*fasync) (int, struct file *, int);
         int (*lock) (struct file *, int, struct file_lock *);
- -      ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
         unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
         int (*check_flags)(int);
         int (*flock) (struct file *, int, struct file_lock *);
         ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
         ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+ +      void (*splice_eof)(struct file *file);
         int (*setlease)(struct file *, long, struct file_lock **, void **);
         long (*fallocate)(struct file *file, int mode, loff_t offset,
                           loff_t len);
@@@ -1936,7 -1931,6 +1935,7 @@@ struct super_operations 
                                   struct shrink_control *);
         long (*free_cached_objects)(struct super_block *,
                                     struct shrink_control *);
+ +      void (*shutdown)(struct super_block *sb);
   };
   
   /*
@@@ -2354,31 -2348,11 +2353,31 @@@ static inline struct file *file_open_ro
         return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
                               name, flags, mode);
   }
- -extern struct file * dentry_open(const struct path *, int, const struct cred *);
- -extern struct file *dentry_create(const struct path *path, int flags,
- -                                umode_t mode, const struct cred *cred);
- -extern struct file * open_with_fake_path(const struct path *, int,
- -                                       struct inode*, const struct cred *);
+ +struct file *dentry_open(const struct path *path, int flags,
+ +                       const struct cred *creds);
+ +struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+ +                         const struct cred *cred);
+ +struct file *backing_file_open(const struct path *path, int flags,
+ +                             const struct path *real_path,
+ +                             const struct cred *cred);
+ +struct path *backing_file_real_path(struct file *f);
+ +
+ +/*
+ + * file_real_path - get the path corresponding to f_inode
+ + *
+ + * When opening a backing file for a stackable filesystem (e.g.,
+ + * overlayfs) f_path may be on the stackable filesystem and f_inode on
+ + * the underlying filesystem.  When the path associated with f_inode is
+ + * needed, this helper should be used instead of accessing f_path
+ + * directly.
+ +*/
+ +static inline const struct path *file_real_path(struct file *f)
+ +{
+ +      if (unlikely(f->f_mode & FMODE_BACKING))
+ +              return backing_file_real_path(f);
+ +      return &f->f_path;
+ +}
+ +
   static inline struct file *file_clone_open(struct file *file)
   {
         return dentry_open(&file->f_path, file->f_flags, file->f_cred);
@@@ -2694,7 -2668,7 +2693,7 @@@ extern void evict_inodes(struct super_b
   void dump_mapping(const struct address_space *);
   
   /*
- - * Userspace may rely on the the inode number being non-zero. For example, glibc
+ + * Userspace may rely on the inode number being non-zero. For example, glibc
    * simply ignores files with zero i_ino in unlink() and other places.
    *
    * As an additional complication, if userspace was compiled with
@@@ -2763,8 -2737,6 +2762,8 @@@ extern ssize_t __generic_file_write_ite
   extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
   extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
   ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
+ +ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
+ +              ssize_t direct_written, ssize_t buffered_written);
   
   ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
                 rwf_t flags);
@@@ -2779,11 -2751,15 +2778,11 @@@ ssize_t vfs_iocb_iter_write(struct fil
   ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
                             struct pipe_inode_info *pipe,
                             size_t len, unsigned int flags);
- -ssize_t direct_splice_read(struct file *in, loff_t *ppos,
- -                         struct pipe_inode_info *pipe,
- -                         size_t len, unsigned int flags);
- -extern ssize_t generic_file_splice_read(struct file *, loff_t *,
- -              struct pipe_inode_info *, size_t, unsigned int);
+ +ssize_t copy_splice_read(struct file *in, loff_t *ppos,
+ +                       struct pipe_inode_info *pipe,
+ +                       size_t len, unsigned int flags);
   extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
                 struct file *, loff_t *, size_t, unsigned int);
- -extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
- -              struct file *out, loff_t *, size_t len, unsigned int flags);
   extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
                 loff_t *opos, size_t len, unsigned int flags);
   
@@@ -2860,6 -2836,11 +2859,6 @@@ static inline void inode_dio_end(struc
                 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
   }
   
- -/*
- - * Warn about a page cache invalidation failure diring a direct I/O write.
- - */
- -void dio_warn_stale_pagecache(struct file *filp);
- -
   extern void inode_set_flags(struct inode *inode, unsigned int flags,
                             unsigned int mask);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 29 Jun 2023 20:39:51 +0000 (13:39 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 29 Jun 2023 20:39:51 +0000 (13:39 -0700)
		1	2
fs/buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext2/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/fsync.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/udf/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/udf/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/buffer_head.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history