vfs: fix data corruption when blocksize < pagesize for mmaped data

author Jan Kara <jack@suse.cz>

Thu, 2 Oct 2014 01:49:18 +0000 (21:49 -0400)

committer Zefan Li <lizefan@huawei.com>

Mon, 2 Feb 2015 09:04:52 +0000 (17:04 +0800)
author Jan Kara <jack@suse.cz>
Thu, 2 Oct 2014 01:49:18 +0000 (21:49 -0400)
committer Zefan Li <lizefan@huawei.com>
Mon, 2 Feb 2015 09:04:52 +0000 (17:04 +0800)
diff --git a/fs/buffer.c b/fs/buffer.c

index 104425b7173d7a6275bc2826b2a63ff6fc145ef2..ed2dc709883a3e9492dcf5977be3b05bf95cf157 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1982,6 +1982,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
                         struct page *page, void *fsdata)
  {
         struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
         int i_size_changed = 0;
  
         copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2001,6 +2002,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
         unlock_page(page);
         page_cache_release(page);
  
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
         /*
          * Don't mark the inode dirty under page lock. First, it unnecessarily
          * makes the holding time of page lock longer. Second, it forces lock
diff --git a/include/linux/mm.h b/include/linux/mm.h

index dbca4b21b7d306c2e3c32712667a06136ae564c4..656b4e968991a82c852689e08b9c9ea263a77e1e 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -953,6 +953,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
  
  extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
  extern void truncate_setsize(struct inode *inode, loff_t newsize);
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
  extern int vmtruncate(struct inode *inode, loff_t offset);
  extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
  void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
diff --git a/mm/truncate.c b/mm/truncate.c

index f38055cb8af6df6cb33d105b91c34b57a9b5cb77..708a499c49a875b2cd365238b73d49826826d262 100644 (file)
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
  #include <linux/buffer_head.h> /* grr. try_to_release_page,
                                    do_invalidatepage */
  #include <linux/cleancache.h>
+#include <linux/rmap.h>
  #include "internal.h"
  
  
@@ -571,16 +572,71 @@ EXPORT_SYMBOL(truncate_pagecache);
   */
  void truncate_setsize(struct inode *inode, loff_t newsize)
  {
-       loff_t oldsize;
-
-       oldsize = inode->i_size;
+       loff_t oldsize = inode->i_size;
         i_size_write(inode, newsize);
  
+       if (newsize > oldsize)
+               pagecache_isize_extended(inode, oldsize, newsize);
         truncate_pagecache(inode, oldsize, newsize);
  }
  EXPORT_SYMBOL(truncate_setsize);
  
  /**
+ * pagecache_isize_extended - update pagecache after extension of i_size
+ * @inode:     inode for which i_size was extended
+ * @from:      original inode size
+ * @to:                new inode size
+ *
+ * Handle extension of inode size either caused by extending truncate or by
+ * write starting after current i_size. We mark the page straddling current
+ * i_size RO so that page_mkwrite() is called on the nearest write access to
+ * the page.  This way filesystem can be sure that page_mkwrite() is called on
+ * the page before user writes to the page via mmap after the i_size has been
+ * changed.
+ *
+ * The function must be called after i_size is updated so that page fault
+ * coming after we unlock the page will already see the new i_size.
+ * The function must be called while we still hold i_mutex - this not only
+ * makes sure i_size is stable but also that userspace cannot observe new
+ * i_size value before we are prepared to store mmap writes at new inode size.
+ */
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
+{
+       int bsize = 1 << inode->i_blkbits;
+       loff_t rounded_from;
+       struct page *page;
+       pgoff_t index;
+
+       WARN_ON(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON(to > inode->i_size);
+
+       if (from >= to || bsize == PAGE_CACHE_SIZE)
+               return;
+       /* Page straddling @from will not have any hole block created? */
+       rounded_from = round_up(from, bsize);
+       if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
+               return;
+
+       index = from >> PAGE_CACHE_SHIFT;
+       page = find_lock_page(inode->i_mapping, index);
+       /* Page not cached? Nothing to do */
+       if (!page)
+               return;
+       /*
+        * See clear_page_dirty_for_io() for details why set_page_dirty()
+        * is needed.
+        */
+       if (page_mkclean(page))
+               set_page_dirty(page);
+       unlock_page(page);
+       page_cache_release(page);
+}
+EXPORT_SYMBOL(pagecache_isize_extended);
+
+/**
+ * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
+ * @inode: inode
+ * @lstart: offset of beginning of hole
   * vmtruncate - unmap mappings "freed" by truncate() syscall
   * @inode: inode of the file used
   * @newsize: file offset to start truncating
author	Jan Kara <jack@suse.cz>
	Thu, 2 Oct 2014 01:49:18 +0000 (21:49 -0400)
committer	Zefan Li <lizefan@huawei.com>
	Mon, 2 Feb 2015 09:04:52 +0000 (17:04 +0800)
fs/buffer.c		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
mm/truncate.c		patch \| blob \| history