Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 24 Sep 2009 15:32:11 +0000 (08:32 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 24 Sep 2009 15:32:11 +0000 (08:32 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Sep 2009 15:32:11 +0000 (08:32 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Sep 2009 15:32:11 +0000 (08:32 -0700)
diff --git a/Documentation/vm/locking b/Documentation/vm/locking

index f366fa9..25fadb4 100644 (file)
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
  mm start up ... this is a loose form of stability on mm_users. For
  example, it is used in copy_mm to protect against a racing tlb_gather_mmu
  single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might 
+truncate) does not lose sending ipi's to cloned threads that might
  be spawned underneath it and go to user mode to drag in pte's into tlbs.
  
  swap_lock
diff --git a/fs/attr.c b/fs/attr.c

index 9fe1b1b..96d394b 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,7 +18,7 @@
  /* Taken over from the old code... */
  
  /* POSIX UID/GID verification for setting inode attributes. */
-int inode_change_ok(struct inode *inode, struct iattr *attr)
+int inode_change_ok(const struct inode *inode, struct iattr *attr)
  {
         int retval = -EPERM;
         unsigned int ia_valid = attr->ia_valid;
@@ -60,9 +60,51 @@ fine:
  error:
         return retval;
  }
-
  EXPORT_SYMBOL(inode_change_ok);
  
+/**
+ * inode_newsize_ok - may this inode be truncated to a given size
+ * @inode:     the inode to be truncated
+ * @offset:    the new size to assign to the inode
+ * @Returns:   0 on success, -ve errno on failure
+ *
+ * inode_newsize_ok will check filesystem limits and ulimits to check that the
+ * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
+ * when necessary. Caller must not proceed with inode size change if failure is
+ * returned. @inode must be a file (not directory), with appropriate
+ * permissions to allow truncate (inode_newsize_ok does NOT check these
+ * conditions).
+ *
+ * inode_newsize_ok must be called with i_mutex held.
+ */
+int inode_newsize_ok(const struct inode *inode, loff_t offset)
+{
+       if (inode->i_size < offset) {
+               unsigned long limit;
+
+               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+               if (limit != RLIM_INFINITY && offset > limit)
+                       goto out_sig;
+               if (offset > inode->i_sb->s_maxbytes)
+                       goto out_big;
+       } else {
+               /*
+                * truncation of in-use swapfiles is disallowed - it would
+                * cause subsequent swapout to scribble on the now-freed
+                * blocks.
+                */
+               if (IS_SWAPFILE(inode))
+                       return -ETXTBSY;
+       }
+
+       return 0;
+out_sig:
+       send_sig(SIGXFSZ, current, 0);
+out_big:
+       return -EFBIG;
+}
+EXPORT_SYMBOL(inode_newsize_ok);
+
  int inode_setattr(struct inode * inode, struct iattr * attr)
  {
         unsigned int ia_valid = attr->ia_valid;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c

index dd376c1..33baf27 100644 (file)
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb)
  {
         kfree(BEFS_SB(sb)->mount_opts.iocharset);
         BEFS_SB(sb)->mount_opts.iocharset = NULL;
-
-       if (BEFS_SB(sb)->nls) {
-               unload_nls(BEFS_SB(sb)->nls);
-               BEFS_SB(sb)->nls = NULL;
-       }
-
+       unload_nls(BEFS_SB(sb)->nls);
         kfree(sb->s_fs_info);
         sb->s_fs_info = NULL;
  }
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 5d1ed50..9cf4b92 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev);
   * freeze_bdev  --  lock a filesystem and force it into a consistent state
   * @bdev:      blockdevice to lock
   *
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
   * If a superblock is found on this device, we take the s_umount semaphore
   * on it to make sure nobody unmounts until the snapshot creation is done.
   * The reference counter (bd_fsfreeze_count) guarantees that only the last
@@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev)
         int error = 0;
  
         mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (bdev->bd_fsfreeze_count > 0) {
-               bdev->bd_fsfreeze_count++;
+       if (++bdev->bd_fsfreeze_count > 1) {
+               /*
+                * We don't even need to grab a reference - the first call
+                * to freeze_bdev grab an active reference and only the last
+                * thaw_bdev drops it.
+                */
                 sb = get_super(bdev);
+               drop_super(sb);
                 mutex_unlock(&bdev->bd_fsfreeze_mutex);
                 return sb;
         }
-       bdev->bd_fsfreeze_count++;
-
-       down(&bdev->bd_mount_sem);
-       sb = get_super(bdev);
-       if (sb && !(sb->s_flags & MS_RDONLY)) {
-               sb->s_frozen = SB_FREEZE_WRITE;
-               smp_wmb();
-
-               sync_filesystem(sb);
-
-               sb->s_frozen = SB_FREEZE_TRANS;
-               smp_wmb();
-
-               sync_blockdev(sb->s_bdev);
-
-               if (sb->s_op->freeze_fs) {
-                       error = sb->s_op->freeze_fs(sb);
-                       if (error) {
-                               printk(KERN_ERR
-                                       "VFS:Filesystem freeze failed\n");
-                               sb->s_frozen = SB_UNFROZEN;
-                               drop_super(sb);
-                               up(&bdev->bd_mount_sem);
-                               bdev->bd_fsfreeze_count--;
-                               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                               return ERR_PTR(error);
-                       }
+
+       sb = get_active_super(bdev);
+       if (!sb)
+               goto out;
+       if (sb->s_flags & MS_RDONLY) {
+               deactivate_locked_super(sb);
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               return sb;
+       }
+
+       sb->s_frozen = SB_FREEZE_WRITE;
+       smp_wmb();
+
+       sync_filesystem(sb);
+
+       sb->s_frozen = SB_FREEZE_TRANS;
+       smp_wmb();
+
+       sync_blockdev(sb->s_bdev);
+
+       if (sb->s_op->freeze_fs) {
+               error = sb->s_op->freeze_fs(sb);
+               if (error) {
+                       printk(KERN_ERR
+                               "VFS:Filesystem freeze failed\n");
+                       sb->s_frozen = SB_UNFROZEN;
+                       deactivate_locked_super(sb);
+                       bdev->bd_fsfreeze_count--;
+                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                       return ERR_PTR(error);
                 }
         }
+       up_write(&sb->s_umount);
  
+ out:
         sync_blockdev(bdev);
         mutex_unlock(&bdev->bd_fsfreeze_mutex);
-
-       return sb;      /* thaw_bdev releases s->s_umount and bd_mount_sem */
+       return sb;      /* thaw_bdev releases s->s_umount */
  }
  EXPORT_SYMBOL(freeze_bdev);
  
@@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
   */
  int thaw_bdev(struct block_device *bdev, struct super_block *sb)
  {
-       int error = 0;
+       int error = -EINVAL;
  
         mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (!bdev->bd_fsfreeze_count) {
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return -EINVAL;
-       }
-
-       bdev->bd_fsfreeze_count--;
-       if (bdev->bd_fsfreeze_count > 0) {
-               if (sb)
-                       drop_super(sb);
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return 0;
-       }
-
-       if (sb) {
-               BUG_ON(sb->s_bdev != bdev);
-               if (!(sb->s_flags & MS_RDONLY)) {
-                       if (sb->s_op->unfreeze_fs) {
-                               error = sb->s_op->unfreeze_fs(sb);
-                               if (error) {
-                                       printk(KERN_ERR
-                                               "VFS:Filesystem thaw failed\n");
-                                       sb->s_frozen = SB_FREEZE_TRANS;
-                                       bdev->bd_fsfreeze_count++;
-                                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                                       return error;
-                               }
-                       }
-                       sb->s_frozen = SB_UNFROZEN;
-                       smp_wmb();
-                       wake_up(&sb->s_wait_unfrozen);
+       if (!bdev->bd_fsfreeze_count)
+               goto out_unlock;
+
+       error = 0;
+       if (--bdev->bd_fsfreeze_count > 0)
+               goto out_unlock;
+
+       if (!sb)
+               goto out_unlock;
+
+       BUG_ON(sb->s_bdev != bdev);
+       down_write(&sb->s_umount);
+       if (sb->s_flags & MS_RDONLY)
+               goto out_deactivate;
+
+       if (sb->s_op->unfreeze_fs) {
+               error = sb->s_op->unfreeze_fs(sb);
+               if (error) {
+                       printk(KERN_ERR
+                               "VFS:Filesystem thaw failed\n");
+                       sb->s_frozen = SB_FREEZE_TRANS;
+                       bdev->bd_fsfreeze_count++;
+                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                       return error;
                 }
-               drop_super(sb);
         }
  
-       up(&bdev->bd_mount_sem);
+       sb->s_frozen = SB_UNFROZEN;
+       smp_wmb();
+       wake_up(&sb->s_wait_unfrozen);
+
+out_deactivate:
+       if (sb)
+               deactivate_locked_super(sb);
+out_unlock:
         mutex_unlock(&bdev->bd_fsfreeze_mutex);
         return 0;
  }
@@ -430,7 +437,6 @@ static void init_once(void *foo)
  
         memset(bdev, 0, sizeof(*bdev));
         mutex_init(&bdev->bd_mutex);
-       sema_init(&bdev->bd_mount_sem, 1);
         INIT_LIST_HEAD(&bdev->bd_inodes);
         INIT_LIST_HEAD(&bdev->bd_list);
  #ifdef CONFIG_SYSFS
diff --git a/fs/buffer.c b/fs/buffer.c

index 209f7f1..24afd74 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
         struct address_space *mapping = inode->i_mapping;
         struct page *page;
         void *fsdata;
-       unsigned long limit;
         int err;
  
-       err = -EFBIG;
-        limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && size > (loff_t)limit) {
-               send_sig(SIGXFSZ, current, 0);
-               goto out;
-       }
-       if (size > inode->i_sb->s_maxbytes)
+       err = inode_newsize_ok(inode, size);
+       if (err)
                 goto out;
  
         err = pagecache_write_begin(NULL, mapping, size, 0,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c

index d79ce2e..90c5b39 100644 (file)
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -185,8 +185,7 @@ out_mount_failed:
                         cifs_sb->mountdata = NULL;
                 }
  #endif
-               if (cifs_sb->local_nls)
-                       unload_nls(cifs_sb->local_nls);
+               unload_nls(cifs_sb->local_nls);
                 kfree(cifs_sb);
         }
         return rc;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c

index 1f09c76..5e24925 100644 (file)
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
  
  static int cifs_vmtruncate(struct inode *inode, loff_t offset)
  {
-       struct address_space *mapping = inode->i_mapping;
-       unsigned long limit;
+       loff_t oldsize;
+       int err;
  
         spin_lock(&inode->i_lock);
-       if (inode->i_size < offset)
-               goto do_expand;
-       /*
-        * truncation of in-use swapfiles is disallowed - it would cause
-        * subsequent swapout to scribble on the now-freed blocks.
-        */
-       if (IS_SWAPFILE(inode)) {
-               spin_unlock(&inode->i_lock);
-               goto out_busy;
-       }
-       i_size_write(inode, offset);
-       spin_unlock(&inode->i_lock);
-       /*
-        * unmap_mapping_range is called twice, first simply for efficiency
-        * so that truncate_inode_pages does fewer single-page unmaps. However
-        * after this first call, and before truncate_inode_pages finishes,
-        * it is possible for private pages to be COWed, which remain after
-        * truncate_inode_pages finishes, hence the second unmap_mapping_range
-        * call must be made for correctness.
-        */
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       truncate_inode_pages(mapping, offset);
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       goto out_truncate;
-
-do_expand:
-       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && offset > limit) {
+       err = inode_newsize_ok(inode, offset);
+       if (err) {
                 spin_unlock(&inode->i_lock);
-               goto out_sig;
-       }
-       if (offset > inode->i_sb->s_maxbytes) {
-               spin_unlock(&inode->i_lock);
-               goto out_big;
+               goto out;
         }
+
+       oldsize = inode->i_size;
         i_size_write(inode, offset);
         spin_unlock(&inode->i_lock);
-out_truncate:
+       truncate_pagecache(inode, oldsize, offset);
         if (inode->i_op->truncate)
                 inode->i_op->truncate(inode);
-       return 0;
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
-out_busy:
-       return -ETXTBSY;
+out:
+       return err;
  }
  
  static int
diff --git a/fs/compat.c b/fs/compat.c

index 3aa4883..d576b55 100644 (file)
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
                                  char __user * type, unsigned long flags,
                                  void __user * data)
  {
-       unsigned long type_page;
+       char *kernel_type;
         unsigned long data_page;
-       unsigned long dev_page;
+       char *kernel_dev;
         char *dir_page;
         int retval;
  
-       retval = copy_mount_options (type, &type_page);
+       retval = copy_mount_string(type, &kernel_type);
         if (retval < 0)
                 goto out;
  
@@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
         if (IS_ERR(dir_page))
                 goto out1;
  
-       retval = copy_mount_options (dev_name, &dev_page);
+       retval = copy_mount_string(dev_name, &kernel_dev);
         if (retval < 0)
                 goto out2;
  
-       retval = copy_mount_options (data, &data_page);
+       retval = copy_mount_options(data, &data_page);
         if (retval < 0)
                 goto out3;
  
         retval = -EINVAL;
  
-       if (type_page && data_page) {
-               if (!strcmp((char *)type_page, SMBFS_NAME)) {
+       if (kernel_type && data_page) {
+               if (!strcmp(kernel_type, SMBFS_NAME)) {
                         do_smb_super_data_conv((void *)data_page);
-               } else if (!strcmp((char *)type_page, NCPFS_NAME)) {
+               } else if (!strcmp(kernel_type, NCPFS_NAME)) {
                         do_ncp_super_data_conv((void *)data_page);
-               } else if (!strcmp((char *)type_page, NFS4_NAME)) {
+               } else if (!strcmp(kernel_type, NFS4_NAME)) {
                         if (do_nfs4_super_data_conv((void *) data_page))
                                 goto out4;
                 }
         }
  
-       retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
+       retval = do_mount(kernel_dev, dir_page, kernel_type,
                         flags, (void*)data_page);
  
   out4:
         free_page(data_page);
   out3:
-       free_page(dev_page);
+       kfree(kernel_dev);
   out2:
         putname(dir_page);
   out1:
-       free_page(type_page);
+       kfree(kernel_type);
   out:
         return retval;
  }
diff --git a/fs/exofs/super.c b/fs/exofs/super.c

index 5ab10c3..9f500de 100644 (file)
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
         }
  
         lock_super(sb);
-       lock_kernel();
         sbi = sb->s_fs_info;
         fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
         fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
@@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
  out:
         if (or)
                 osd_end_request(or);
-       unlock_kernel();
         unlock_super(sb);
         kfree(fscb);
         return ret;
@@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb)
         int num_pend;
         struct exofs_sb_info *sbi = sb->s_fs_info;
  
-       lock_kernel();
-
         if (sb->s_dirt)
                 exofs_write_super(sb);
  
@@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb)
         osduld_put_device(sbi->s_dev);
         kfree(sb->s_fs_info);
         sb->s_fs_info = NULL;
-
-       unlock_kernel();
  }
  
  /*
diff --git a/fs/fat/inode.c b/fs/fat/inode.c

index 8970d8c..04629d1 100644 (file)
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb)
  
         iput(sbi->fat_inode);
  
-       if (sbi->nls_disk) {
-               unload_nls(sbi->nls_disk);
-               sbi->nls_disk = NULL;
-               sbi->options.codepage = fat_default_codepage;
-       }
-       if (sbi->nls_io) {
-               unload_nls(sbi->nls_io);
-               sbi->nls_io = NULL;
-       }
-       if (sbi->options.iocharset != fat_default_iocharset) {
+       unload_nls(sbi->nls_disk);
+       unload_nls(sbi->nls_io);
+
+       if (sbi->options.iocharset != fat_default_iocharset)
                 kfree(sbi->options.iocharset);
-               sbi->options.iocharset = fat_default_iocharset;
-       }
  
         sb->s_fs_info = NULL;
         kfree(sbi);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c

index e703654..992f6c9 100644 (file)
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
                 return 0;
  
         if (attr->ia_valid & ATTR_SIZE) {
-               unsigned long limit;
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
-                       send_sig(SIGXFSZ, current, 0);
-                       return -EFBIG;
-               }
+               err = inode_newsize_ok(inode, attr->ia_size);
+               if (err)
+                       return err;
                 is_truncate = true;
         }
  
@@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
          * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
          */
         if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
-               if (outarg.attr.size < oldsize)
-                       fuse_truncate(inode->i_mapping, outarg.attr.size);
+               truncate_pagecache(inode, oldsize, outarg.attr.size);
                 invalidate_inode_pages2(inode->i_mapping);
         }
  
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h

index fc9c79f..01cc462 100644 (file)
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
  void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
                                    u64 attr_valid);
  
-void fuse_truncate(struct address_space *mapping, loff_t offset);
-
  /**
   * Initialize the client device
   */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c

index 6da947d..1a822ce 100644 (file)
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
         return 0;
  }
  
-void fuse_truncate(struct address_space *mapping, loff_t offset)
-{
-       /* See vmtruncate() */
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       truncate_inode_pages(mapping, offset);
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-}
-
  void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
                                    u64 attr_valid)
  {
@@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
         spin_unlock(&fc->lock);
  
         if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
-               if (attr->size < oldsize)
-                       fuse_truncate(inode->i_mapping, attr->size);
+               truncate_pagecache(inode, oldsize, attr->size);
                 invalidate_inode_pages2(inode->i_mapping);
         }
  }
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c

index 7b6165f..8bbe03c 100644 (file)
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb)
         brelse(HFS_SB(sb)->mdb_bh);
         brelse(HFS_SB(sb)->alt_mdb_bh);
  
-       if (HFS_SB(sb)->nls_io)
-               unload_nls(HFS_SB(sb)->nls_io);
-       if (HFS_SB(sb)->nls_disk)
-               unload_nls(HFS_SB(sb)->nls_disk);
+       unload_nls(HFS_SB(sb)->nls_io);
+       unload_nls(HFS_SB(sb)->nls_disk);
  
         free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
         kfree(HFS_SB(sb));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c

index c0759fe..43022f3 100644 (file)
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb)
         iput(HFSPLUS_SB(sb).alloc_file);
         iput(HFSPLUS_SB(sb).hidden_dir);
         brelse(HFSPLUS_SB(sb).s_vhbh);
-       if (HFSPLUS_SB(sb).nls)
-               unload_nls(HFSPLUS_SB(sb).nls);
+       unload_nls(HFSPLUS_SB(sb).nls);
         kfree(sb->s_fs_info);
         sb->s_fs_info = NULL;
  
@@ -464,8 +463,7 @@ out:
  
  cleanup:
         hfsplus_put_super(sb);
-       if (nls)
-               unload_nls(nls);
+       unload_nls(nls);
         return err;
  }
  
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 1333354..87a1258 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
  
  static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
  {
-       struct super_block *sb = inode->i_sb;
-
-       if (!hlist_unhashed(&inode->i_hash)) {
-               if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-                       list_move(&inode->i_list, &inode_unused);
-               inodes_stat.nr_unused++;
-               if (!sb || (sb->s_flags & MS_ACTIVE)) {
-                       spin_unlock(&inode_lock);
-                       return;
-               }
-               inode->i_state |= I_WILL_FREE;
-               spin_unlock(&inode_lock);
-               /*
-                * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
-                * in our backing_dev_info.
-                */
-               write_inode_now(inode, 1);
-               spin_lock(&inode_lock);
-               inode->i_state &= ~I_WILL_FREE;
-               inodes_stat.nr_unused--;
-               hlist_del_init(&inode->i_hash);
+       if (generic_detach_inode(inode)) {
+               truncate_hugepages(inode, 0);
+               clear_inode(inode);
+               destroy_inode(inode);
         }
-       list_del_init(&inode->i_list);
-       list_del_init(&inode->i_sb_list);
-       inode->i_state |= I_FREEING;
-       inodes_stat.nr_inodes--;
-       spin_unlock(&inode_lock);
-       truncate_hugepages(inode, 0);
-       clear_inode(inode);
-       destroy_inode(inode);
  }
  
  static void hugetlbfs_drop_inode(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c

index 76582b0..4d8e3be 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
  }
  EXPORT_SYMBOL(generic_delete_inode);
  
-static void generic_forget_inode(struct inode *inode)
+/**
+ *     generic_detach_inode - remove inode from inode lists
+ *     @inode: inode to remove
+ *
+ *     Remove inode from inode lists, write it if it's dirty. This is just an
+ *     internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ *     Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
  {
         struct super_block *sb = inode->i_sb;
  
@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
                 inodes_stat.nr_unused++;
                 if (sb->s_flags & MS_ACTIVE) {
                         spin_unlock(&inode_lock);
-                       return;
+                       return 0;
                 }
                 WARN_ON(inode->i_state & I_NEW);
                 inode->i_state |= I_WILL_FREE;
@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
         inode->i_state |= I_FREEING;
         inodes_stat.nr_inodes--;
         spin_unlock(&inode_lock);
+       return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+
+static void generic_forget_inode(struct inode *inode)
+{
+       if (!generic_detach_inode(inode))
+               return;
         if (inode->i_data.nrpages)
                 truncate_inode_pages(&inode->i_data, 0);
         clear_inode(inode);
@@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
         struct inode *inode = dentry->d_inode;
         struct timespec now;
  
-       if (mnt_want_write(mnt))
-               return;
         if (inode->i_flags & S_NOATIME)
-               goto out;
+               return;
         if (IS_NOATIME(inode))
-               goto out;
+               return;
         if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
-               goto out;
+               return;
  
         if (mnt->mnt_flags & MNT_NOATIME)
-               goto out;
+               return;
         if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
-               goto out;
+               return;
  
         now = current_fs_time(inode->i_sb);
  
         if (!relatime_need_update(mnt, inode, now))
-               goto out;
+               return;
  
         if (timespec_equal(&inode->i_atime, &now))
-               goto out;
+               return;
+
+       if (mnt_want_write(mnt))
+               return;
  
         inode->i_atime = now;
         mark_inode_dirty_sync(inode);
-out:
         mnt_drop_write(mnt);
  }
  EXPORT_SYMBOL(touch_atime);
@@ -1444,34 +1461,37 @@ void file_update_time(struct file *file)
  {
         struct inode *inode = file->f_path.dentry->d_inode;
         struct timespec now;
-       int sync_it = 0;
-       int err;
+       enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
  
+       /* First try to exhaust all avenues to not sync */
         if (IS_NOCMTIME(inode))
                 return;
  
-       err = mnt_want_write_file(file);
-       if (err)
-               return;
-
         now = current_fs_time(inode->i_sb);
-       if (!timespec_equal(&inode->i_mtime, &now)) {
-               inode->i_mtime = now;
-               sync_it = 1;
-       }
+       if (!timespec_equal(&inode->i_mtime, &now))
+               sync_it = S_MTIME;
  
-       if (!timespec_equal(&inode->i_ctime, &now)) {
-               inode->i_ctime = now;
-               sync_it = 1;
-       }
+       if (!timespec_equal(&inode->i_ctime, &now))
+               sync_it |= S_CTIME;
  
-       if (IS_I_VERSION(inode)) {
-               inode_inc_iversion(inode);
-               sync_it = 1;
-       }
+       if (IS_I_VERSION(inode))
+               sync_it |= S_VERSION;
+
+       if (!sync_it)
+               return;
  
-       if (sync_it)
-               mark_inode_dirty_sync(inode);
+       /* Finally allowed to write? Takes lock. */
+       if (mnt_want_write_file(file))
+               return;
+
+       /* Only change inode inside the lock region */
+       if (sync_it & S_VERSION)
+               inode_inc_iversion(inode);
+       if (sync_it & S_CTIME)
+               inode->i_ctime = now;
+       if (sync_it & S_MTIME)
+               inode->i_mtime = now;
+       mark_inode_dirty_sync(inode);
         mnt_drop_write(file->f_path.mnt);
  }
  EXPORT_SYMBOL(file_update_time);
@@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
         else if (S_ISSOCK(mode))
                 inode->i_fop = &bad_sock_fops;
         else
-               printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
-                      mode);
+               printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
+                                 " inode %s:%lu\n", mode, inode->i_sb->s_id,
+                                 inode->i_ino);
  }
  EXPORT_SYMBOL(init_special_inode);
diff --git a/fs/internal.h b/fs/internal.h

index d55ef56..515175b 100644 (file)
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *);
   * namespace.c
   */
  extern int copy_mount_options(const void __user *, unsigned long *);
+extern int copy_mount_string(const void __user *, char **);
  
  extern void free_vfsmnt(struct vfsmount *);
  extern struct vfsmount *alloc_vfsmnt(const char *);
diff --git a/fs/ioctl.c b/fs/ioctl.c

index 5612880..7b17a14 100644 (file)
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags);
  static int fiemap_check_ranges(struct super_block *sb,
                                u64 start, u64 len, u64 *new_len)
  {
+       u64 maxbytes = (u64) sb->s_maxbytes;
+
         *new_len = len;
  
         if (len == 0)
                 return -EINVAL;
  
-       if (start > sb->s_maxbytes)
+       if (start > maxbytes)
                 return -EFBIG;
  
         /*
          * Shrink request scope to what the fs can actually handle.
          */
-       if ((len > sb->s_maxbytes) ||
-           (sb->s_maxbytes - len) < start)
-               *new_len = sb->s_maxbytes - start;
+       if (len > maxbytes || (maxbytes - len) < start)
+               *new_len = maxbytes - start;
  
         return 0;
  }
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c

index 85f96bc..6b4dcd4 100644 (file)
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb)
  #ifdef CONFIG_JOLIET
         lock_kernel();
  
-       if (sbi->s_nls_iocharset) {
-               unload_nls(sbi->s_nls_iocharset);
-               sbi->s_nls_iocharset = NULL;
-       }
+       unload_nls(sbi->s_nls_iocharset);
  
         unlock_kernel();
  #endif
@@ -912,8 +909,7 @@ out_no_root:
                 printk(KERN_WARNING "%s: get root inode failed\n", __func__);
  out_no_inode:
  #ifdef CONFIG_JOLIET
-       if (sbi->s_nls_iocharset)
-               unload_nls(sbi->s_nls_iocharset);
+       unload_nls(sbi->s_nls_iocharset);
  #endif
         goto out_freesbi;
  out_no_read:
diff --git a/fs/jfs/super.c b/fs/jfs/super.c

index 37e6dcd..2234c73 100644 (file)
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb)
         rc = jfs_umount(sb);
         if (rc)
                 jfs_err("jfs_umount failed with return code %d", rc);
-       if (sbi->nls_tab)
-               unload_nls(sbi->nls_tab);
-       sbi->nls_tab = NULL;
+
+       unload_nls(sbi->nls_tab);
  
         truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
         iput(sbi->direct_inode);
-       sbi->direct_inode = NULL;
  
         kfree(sbi);
  
@@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
  
         if (nls_map != (void *) -1) {
                 /* Discard old (if remount) */
-               if (sbi->nls_tab)
-                       unload_nls(sbi->nls_tab);
+               unload_nls(sbi->nls_tab);
                 sbi->nls_tab = nls_map;
         }
         return 1;
diff --git a/fs/libfs.c b/fs/libfs.c

index dcec3d3..219576c 100644 (file)
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
                                 const void *from, size_t available)
  {
         loff_t pos = *ppos;
+       size_t ret;
+
         if (pos < 0)
                 return -EINVAL;
-       if (pos >= available)
+       if (pos >= available || !count)
                 return 0;
         if (count > available - pos)
                 count = available - pos;
-       if (copy_to_user(to, from + pos, count))
+       ret = copy_to_user(to, from + pos, count);
+       if (ret == count)
                 return -EFAULT;
+       count -= ret;
         *ppos = pos + count;
         return count;
  }
@@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
         if (copy_from_user(attr->set_buf, buf, size))
                 goto out;
  
-       ret = len; /* claim we got the whole input */
         attr->set_buf[size] = '\0';
         val = simple_strtol(attr->set_buf, NULL, 0);
-       attr->set(attr->data, val);
+       ret = attr->set(attr->data, val);
+       if (ret == 0)
+               ret = len; /* on success, claim we got the whole input */
  out:
         mutex_unlock(&attr->mutex);
         return ret;
diff --git a/fs/namespace.c b/fs/namespace.c

index 7230787..bdc3cb4 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
  {
         struct vfsmount *mnt;
  
-       if (!type || !memchr(type, 0, PAGE_SIZE))
+       if (!type)
                 return -EINVAL;
  
         /* we need capabilities... */
@@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where)
         return 0;
  }
  
+int copy_mount_string(const void __user *data, char **where)
+{
+       char *tmp;
+
+       if (!data) {
+               *where = NULL;
+               return 0;
+       }
+
+       tmp = strndup_user(data, PAGE_SIZE);
+       if (IS_ERR(tmp))
+               return PTR_ERR(tmp);
+
+       *where = tmp;
+       return 0;
+}
+
  /*
   * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
   * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
@@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
  
         if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
                 return -EINVAL;
-       if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
-               return -EINVAL;
  
         if (data_page)
                 ((char *)data_page)[PAGE_SIZE - 1] = 0;
@@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns);
  SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
                 char __user *, type, unsigned long, flags, void __user *, data)
  {
-       int retval;
+       int ret;
+       char *kernel_type;
+       char *kernel_dir;
+       char *kernel_dev;
         unsigned long data_page;
-       unsigned long type_page;
-       unsigned long dev_page;
-       char *dir_page;
  
-       retval = copy_mount_options(type, &type_page);
-       if (retval < 0)
-               return retval;
+       ret = copy_mount_string(type, &kernel_type);
+       if (ret < 0)
+               goto out_type;
  
-       dir_page = getname(dir_name);
-       retval = PTR_ERR(dir_page);
-       if (IS_ERR(dir_page))
-               goto out1;
+       kernel_dir = getname(dir_name);
+       if (IS_ERR(kernel_dir)) {
+               ret = PTR_ERR(kernel_dir);
+               goto out_dir;
+       }
  
-       retval = copy_mount_options(dev_name, &dev_page);
-       if (retval < 0)
-               goto out2;
+       ret = copy_mount_string(dev_name, &kernel_dev);
+       if (ret < 0)
+               goto out_dev;
  
-       retval = copy_mount_options(data, &data_page);
-       if (retval < 0)
-               goto out3;
+       ret = copy_mount_options(data, &data_page);
+       if (ret < 0)
+               goto out_data;
  
-       retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
-                         flags, (void *)data_page);
-       free_page(data_page);
+       ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
+               (void *) data_page);
  
-out3:
-       free_page(dev_page);
-out2:
-       putname(dir_page);
-out1:
-       free_page(type_page);
-       return retval;
+       free_page(data_page);
+out_data:
+       kfree(kernel_dev);
+out_dev:
+       putname(kernel_dir);
+out_dir:
+       kfree(kernel_type);
+out_type:
+       return ret;
  }
  
  /*
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c

index b99ce20..cf98da1 100644 (file)
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb)
  
  #ifdef CONFIG_NCPFS_NLS
         /* unload the NLS charsets */
-       if (server->nls_vol)
-       {
-               unload_nls(server->nls_vol);
-               server->nls_vol = NULL;
-       }
-       if (server->nls_io)
-       {
-               unload_nls(server->nls_io);
-               server->nls_io = NULL;
-       }
+       unload_nls(server->nls_vol);
+       unload_nls(server->nls_io);
  #endif /* CONFIG_NCPFS_NLS */
  
         if (server->info_filp)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c

index 53a7ed7..0d58caf 100644 (file)
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
         oldset_io = server->nls_io;
         server->nls_io = iocharset;
  
-       if (oldset_cp)
-               unload_nls(oldset_cp);
-       if (oldset_io)
-               unload_nls(oldset_io);
+       unload_nls(oldset_cp);
+       unload_nls(oldset_io);
  
         return 0;
  }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c

index 060022b..faa0918 100644 (file)
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
   */
  static int nfs_vmtruncate(struct inode * inode, loff_t offset)
  {
-       if (i_size_read(inode) < offset) {
-               unsigned long limit;
-
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && offset > limit)
-                       goto out_sig;
-               if (offset > inode->i_sb->s_maxbytes)
-                       goto out_big;
-               spin_lock(&inode->i_lock);
-               i_size_write(inode, offset);
-               spin_unlock(&inode->i_lock);
-       } else {
-               struct address_space *mapping = inode->i_mapping;
+       loff_t oldsize;
+       int err;
  
-               /*
-                * truncation of in-use swapfiles is disallowed - it would
-                * cause subsequent swapout to scribble on the now-freed
-                * blocks.
-                */
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               spin_lock(&inode->i_lock);
-               i_size_write(inode, offset);
-               spin_unlock(&inode->i_lock);
+       err = inode_newsize_ok(inode, offset);
+       if (err)
+               goto out;
  
-               /*
-                * unmap_mapping_range is called twice, first simply for
-                * efficiency so that truncate_inode_pages does fewer
-                * single-page unmaps.  However after this first call, and
-                * before truncate_inode_pages finishes, it is possible for
-                * private pages to be COWed, which remain after
-                * truncate_inode_pages finishes, hence the second
-                * unmap_mapping_range call must be made for correctness.
-                */
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-               truncate_inode_pages(mapping, offset);
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       }
-       return 0;
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
+       spin_lock(&inode->i_lock);
+       oldsize = inode->i_size;
+       i_size_write(inode, offset);
+       spin_unlock(&inode->i_lock);
+
+       truncate_pagecache(inode, oldsize, offset);
+out:
+       return err;
  }
  
  /**
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c

index 477d37d..2224b4d 100644 (file)
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset)
  
  void unload_nls(struct nls_table *nls)
  {
-       module_put(nls->owner);
+       if (nls)
+               module_put(nls->owner);
  }
  
  static const wchar_t charset2uni[256] = {
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c

index abaaa1c..80b0477 100644 (file)
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -201,8 +201,7 @@ use_utf8:
                                                 v, old_nls->charset);
                                 nls_map = old_nls;
                         } else /* nls_map */ {
-                               if (old_nls)
-                                       unload_nls(old_nls);
+                               unload_nls(old_nls);
                         }
                 } else if (!strcmp(p, "utf8")) {
                         bool val = false;
@@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb)
                 ntfs_free(vol->upcase);
                 vol->upcase = NULL;
         }
-       if (vol->nls_map) {
-               unload_nls(vol->nls_map);
-               vol->nls_map = NULL;
-       }
+
+       unload_nls(vol->nls_map);
+
         sb->s_fs_info = NULL;
         kfree(vol);
  
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c

index 11f0c06..32fae40 100644 (file)
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
         /* make various checks */
         order = get_order(newsize);
         if (unlikely(order >= MAX_ORDER))
-               goto too_big;
+               return -EFBIG;
  
-       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && newsize > limit)
-               goto fsize_exceeded;
-
-       if (newsize > inode->i_sb->s_maxbytes)
-               goto too_big;
+       ret = inode_newsize_ok(inode, newsize);
+       if (ret)
+               return ret;
  
         i_size_write(inode, newsize);
  
@@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
  
         return 0;
  
- fsize_exceeded:
-       send_sig(SIGXFSZ, current, 0);
- too_big:
-       return -EFBIG;
-
- add_error:
+add_error:
         while (loop < npages)
                 __free_page(pages + loop++);
         return ret;
diff --git a/fs/read_write.c b/fs/read_write.c

index 6c8c55d..3ac2898 100644 (file)
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
  
         pos = *ppos;
-       retval = -EINVAL;
-       if (unlikely(pos < 0))
-               goto fput_out;
         if (unlikely(pos + count > max)) {
                 retval = -EOVERFLOW;
                 if (pos >= max)
diff --git a/fs/seq_file.c b/fs/seq_file.c

index 6c95927..eae7d9d 100644 (file)
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
   */
  int seq_path(struct seq_file *m, struct path *path, char *esc)
  {
-       if (m->count < m->size) {
-               char *s = m->buf + m->count;
-               char *p = d_path(path, s, m->size - m->count);
+       char *buf;
+       size_t size = seq_get_buf(m, &buf);
+       int res = -1;
+
+       if (size) {
+               char *p = d_path(path, buf, size);
                 if (!IS_ERR(p)) {
-                       s = mangle_path(s, p, esc);
-                       if (s) {
-                               p = m->buf + m->count;
-                               m->count = s - m->buf;
-                               return s - p;
-                       }
+                       char *end = mangle_path(buf, p, esc);
+                       if (end)
+                               res = end - buf;
                 }
         }
-       m->count = m->size;
-       return -1;
+       seq_commit(m, res);
+
+       return res;
  }
  EXPORT_SYMBOL(seq_path);
  
@@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path);
  int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
                   char *esc)
  {
-       int err = -ENAMETOOLONG;
-       if (m->count < m->size) {
-               char *s = m->buf + m->count;
+       char *buf;
+       size_t size = seq_get_buf(m, &buf);
+       int res = -ENAMETOOLONG;
+
+       if (size) {
                 char *p;
  
                 spin_lock(&dcache_lock);
-               p = __d_path(path, root, s, m->size - m->count);
+               p = __d_path(path, root, buf, size);
                 spin_unlock(&dcache_lock);
-               err = PTR_ERR(p);
+               res = PTR_ERR(p);
                 if (!IS_ERR(p)) {
-                       s = mangle_path(s, p, esc);
-                       if (s) {
-                               p = m->buf + m->count;
-                               m->count = s - m->buf;
-                               return 0;
-                       }
+                       char *end = mangle_path(buf, p, esc);
+                       if (end)
+                               res = end - buf;
+                       else
+                               res = -ENAMETOOLONG;
                 }
         }
-       m->count = m->size;
-       return err;
+       seq_commit(m, res);
+
+       return res < 0 ? res : 0;
  }
  
  /*
@@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
   */
  int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
  {
-       if (m->count < m->size) {
-               char *s = m->buf + m->count;
-               char *p = dentry_path(dentry, s, m->size - m->count);
+       char *buf;
+       size_t size = seq_get_buf(m, &buf);
+       int res = -1;
+
+       if (size) {
+               char *p = dentry_path(dentry, buf, size);
                 if (!IS_ERR(p)) {
-                       s = mangle_path(s, p, esc);
-                       if (s) {
-                               p = m->buf + m->count;
-                               m->count = s - m->buf;
-                               return s - p;
-                       }
+                       char *end = mangle_path(buf, p, esc);
+                       if (end)
+                               res = end - buf;
                 }
         }
-       m->count = m->size;
-       return -1;
+       seq_commit(m, res);
+
+       return res;
  }
  
  int seq_bitmap(struct seq_file *m, const unsigned long *bits,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c

index 1402d2d..1c4c8f0 100644 (file)
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m)
  static void
  smb_unload_nls(struct smb_sb_info *server)
  {
-       if (server->remote_nls) {
-               unload_nls(server->remote_nls);
-               server->remote_nls = NULL;
-       }
-       if (server->local_nls) {
-               unload_nls(server->local_nls);
-               server->local_nls = NULL;
-       }
+       unload_nls(server->remote_nls);
+       unload_nls(server->local_nls);
  }
  
  static void
diff --git a/fs/super.c b/fs/super.c

index 0e7207b..19eb70b 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -465,6 +465,48 @@ rescan:
  }
  
  EXPORT_SYMBOL(get_super);
+
+/**
+ * get_active_super - get an active reference to the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given.  Returns the superblock with an active
+ * reference and s_umount held exclusively or %NULL if none was found.
+ */
+struct super_block *get_active_super(struct block_device *bdev)
+{
+       struct super_block *sb;
+
+       if (!bdev)
+               return NULL;
+
+       spin_lock(&sb_lock);
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               if (sb->s_bdev != bdev)
+                       continue;
+
+               sb->s_count++;
+               spin_unlock(&sb_lock);
+               down_write(&sb->s_umount);
+               if (sb->s_root) {
+                       spin_lock(&sb_lock);
+                       if (sb->s_count > S_BIAS) {
+                               atomic_inc(&sb->s_active);
+                               sb->s_count--;
+                               spin_unlock(&sb_lock);
+                               return sb;
+                       }
+                       spin_unlock(&sb_lock);
+               }
+               up_write(&sb->s_umount);
+               put_super(sb);
+               yield();
+               spin_lock(&sb_lock);
+       }
+       spin_unlock(&sb_lock);
+       return NULL;
+}
   
  struct super_block * user_get_super(dev_t dev)
  {
@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
  {
         int retval;
         int remount_rw;
-       
+
+       if (sb->s_frozen != SB_UNFROZEN)
+               return -EBUSY;
+
  #ifdef CONFIG_BLOCK
         if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
                 return -EACCES;
  #endif
+
         if (flags & MS_RDONLY)
                 acct_auto_close(sb);
         shrink_dcache_sb(sb);
@@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type,
          * will protect the lockfs code from trying to start a snapshot
          * while we are mounting
          */
-       down(&bdev->bd_mount_sem);
+       mutex_lock(&bdev->bd_fsfreeze_mutex);
+       if (bdev->bd_fsfreeze_count > 0) {
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               error = -EBUSY;
+               goto error_bdev;
+       }
         s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-       up(&bdev->bd_mount_sem);
+       mutex_unlock(&bdev->bd_fsfreeze_mutex);
         if (IS_ERR(s))
                 goto error_s;
  
@@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
         if (error)
                 goto out_sb;
  
+       /*
+        * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
+        * but s_maxbytes was an unsigned long long for many releases. Throw
+        * this warning for a little while to try and catch filesystems that
+        * violate this rule. This warning should be either removed or
+        * converted to a BUG() in 2.6.34.
+        */
+       WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
+               "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
+
         mnt->mnt_mountpoint = mnt->mnt_root;
         mnt->mnt_parent = mnt;
         up_write(&mnt->mnt_sb->s_umount);
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 78e95b8..2adaa25 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -641,7 +641,6 @@ struct block_device {
         struct super_block *    bd_super;
         int                     bd_openers;
         struct mutex            bd_mutex;       /* open/close mutex */
-       struct semaphore        bd_mount_sem;
         struct list_head        bd_inodes;
         void *                  bd_holder;
         int                     bd_holders;
@@ -1316,7 +1315,7 @@ struct super_block {
         unsigned long           s_blocksize;
         unsigned char           s_blocksize_bits;
         unsigned char           s_dirt;
-       unsigned long long      s_maxbytes;     /* Max file size */
+       loff_t                  s_maxbytes;     /* Max file size */
         struct file_system_type *s_type;
         const struct super_operations   *s_op;
         const struct dquot_operations   *dq_op;
@@ -2157,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t);
  extern int inode_needs_sync(struct inode *inode);
  extern void generic_delete_inode(struct inode *inode);
  extern void generic_drop_inode(struct inode *inode);
+extern int generic_detach_inode(struct inode *inode);
  
  extern struct inode *ilookup5_nowait(struct super_block *sb,
                 unsigned long hashval, int (*test)(struct inode *, void *),
@@ -2335,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs);
  extern void put_filesystem(struct file_system_type *fs);
  extern struct file_system_type *get_fs_type(const char *name);
  extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_active_super(struct block_device *bdev);
  extern struct super_block *user_get_super(dev_t);
  extern void drop_super(struct super_block *sb);
  
@@ -2382,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *,
  #define buffer_migrate_page NULL
  #endif
  
-extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_change_ok(const struct inode *, struct iattr *);
+extern int inode_newsize_ok(const struct inode *, loff_t offset);
  extern int __must_check inode_setattr(struct inode *, struct iattr *);
  
  extern void file_update_time(struct file *file);
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 6953a5a..df08551 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -792,8 +792,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
         unmap_mapping_range(mapping, holebegin, holelen, 0);
  }
  
-extern int vmtruncate(struct inode * inode, loff_t offset);
-extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
+extern int vmtruncate(struct inode *inode, loff_t offset);
+extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
  
  int truncate_inode_page(struct address_space *mapping, struct page *page);
  int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h

index 0c6a86b..8366d8f 100644 (file)
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -35,6 +35,44 @@ struct seq_operations {
  
  #define SEQ_SKIP 1
  
+/**
+ * seq_get_buf - get buffer to write arbitrary data to
+ * @m: the seq_file handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
+{
+       BUG_ON(m->count > m->size);
+       if (m->count < m->size)
+               *bufp = m->buf + m->count;
+       else
+               *bufp = NULL;
+
+       return m->size - m->count;
+}
+
+/**
+ * seq_commit - commit data to the buffer
+ * @m: the seq_file handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get.  To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_commit(struct seq_file *m, int num)
+{
+       if (num < 0) {
+               m->count = m->size;
+       } else {
+               BUG_ON(m->count + num > m->size);
+               m->count += num;
+       }
+}
+
  char *mangle_path(char *s, char *p, char *esc);
  int seq_open(struct file *, const struct seq_operations *);
  ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/mm/filemap.c b/mm/filemap.c

index c1fc205..6c84e59 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,7 +58,7 @@
  /*
   * Lock ordering:
   *
- *  ->i_mmap_lock              (vmtruncate)
+ *  ->i_mmap_lock              (truncate_pagecache)
   *    ->private_lock           (__free_pte->__set_page_dirty_buffers)
   *      ->swap_lock            (exclusive_swap_page, others)
   *        ->mapping->tree_lock
diff --git a/mm/memory.c b/mm/memory.c

index 987389a..7e91b5f 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 unsigned long addr = vma->vm_start;
  
                 /*
-                * Hide vma from rmap and vmtruncate before freeing pgtables
+                * Hide vma from rmap and truncate_pagecache before freeing
+                * pgtables
                  */
                 anon_vma_unlink(vma);
                 unlink_file_vma(vma);
@@ -2408,7 +2409,7 @@ restart:
   * @mapping: the address space containing mmaps to be unmapped.
   * @holebegin: byte in first page to unmap, relative to the start of
   * the underlying file.  This will be rounded down to a PAGE_SIZE
- * boundary.  Note that this is different from vmtruncate(), which
+ * boundary.  Note that this is different from truncate_pagecache(), which
   * must keep the partial page.  In contrast, we must get rid of
   * partial pages.
   * @holelen: size of prospective hole in bytes.  This will be rounded
@@ -2459,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping,
  }
  EXPORT_SYMBOL(unmap_mapping_range);
  
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
-       if (inode->i_size < offset) {
-               unsigned long limit;
-
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && offset > limit)
-                       goto out_sig;
-               if (offset > inode->i_sb->s_maxbytes)
-                       goto out_big;
-               i_size_write(inode, offset);
-       } else {
-               struct address_space *mapping = inode->i_mapping;
-
-               /*
-                * truncation of in-use swapfiles is disallowed - it would
-                * cause subsequent swapout to scribble on the now-freed
-                * blocks.
-                */
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               i_size_write(inode, offset);
-
-               /*
-                * unmap_mapping_range is called twice, first simply for
-                * efficiency so that truncate_inode_pages does fewer
-                * single-page unmaps.  However after this first call, and
-                * before truncate_inode_pages finishes, it is possible for
-                * private pages to be COWed, which remain after
-                * truncate_inode_pages finishes, hence the second
-                * unmap_mapping_range call must be made for correctness.
-                */
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-               truncate_inode_pages(mapping, offset);
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       }
-
-       if (inode->i_op->truncate)
-               inode->i_op->truncate(inode);
-       return 0;
-
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
  int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
  {
         struct address_space *mapping = inode->i_mapping;
diff --git a/mm/mremap.c b/mm/mremap.c

index 20a07db..97bff25 100644 (file)
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
         if (vma->vm_file) {
                 /*
                  * Subtle point from Rajesh Venkatasubramanian: before
-                * moving file-based ptes, we must lock vmtruncate out,
-                * since it might clean the dst vma before the src vma,
+                * moving file-based ptes, we must lock truncate_pagecache
+                * out, since it might clean the dst vma before the src vma,
                  * and we propagate stale pages into the dst afterward.
                  */
                 mapping = vma->vm_file->f_mapping;
diff --git a/mm/nommu.c b/mm/nommu.c

index 8d48424..56a446f 100644 (file)
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -83,46 +83,6 @@ struct vm_operations_struct generic_file_vm_ops = {
  };
  
  /*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode *inode, loff_t offset)
-{
-       struct address_space *mapping = inode->i_mapping;
-       unsigned long limit;
-
-       if (inode->i_size < offset)
-               goto do_expand;
-       i_size_write(inode, offset);
-
-       truncate_inode_pages(mapping, offset);
-       goto out_truncate;
-
-do_expand:
-       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && offset > limit)
-               goto out_sig;
-       if (offset > inode->i_sb->s_maxbytes)
-               goto out;
-       i_size_write(inode, offset);
-
-out_truncate:
-       if (inode->i_op->truncate)
-               inode->i_op->truncate(inode);
-       return 0;
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out:
-       return -EFBIG;
-}
-
-EXPORT_SYMBOL(vmtruncate);
-
-/*
   * Return the total memory allocated for this pointer, not
   * just what the caller asked for.
   *
diff --git a/mm/truncate.c b/mm/truncate.c

index a17b397..450cebd 100644 (file)
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -497,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
         return invalidate_inode_pages2_range(mapping, 0, -1);
  }
  EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
+
+/**
+ * truncate_pagecache - unmap and remove pagecache that has been truncated
+ * @inode: inode
+ * @old: old file offset
+ * @new: new file offset
+ *
+ * inode's new i_size must already be written before truncate_pagecache
+ * is called.
+ *
+ * This function should typically be called before the filesystem
+ * releases resources associated with the freed range (eg. deallocates
+ * blocks). This way, pagecache will always stay logically coherent
+ * with on-disk format, and the filesystem would not have to deal with
+ * situations such as writepage being called for a page that has already
+ * had its underlying blocks deallocated.
+ */
+void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
+{
+       if (new < old) {
+               struct address_space *mapping = inode->i_mapping;
+
+               /*
+                * unmap_mapping_range is called twice, first simply for
+                * efficiency so that truncate_inode_pages does fewer
+                * single-page unmaps.  However after this first call, and
+                * before truncate_inode_pages finishes, it is possible for
+                * private pages to be COWed, which remain after
+                * truncate_inode_pages finishes, hence the second
+                * unmap_mapping_range call must be made for correctness.
+                */
+               unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+               truncate_inode_pages(mapping, new);
+               unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+       }
+}
+EXPORT_SYMBOL(truncate_pagecache);
+
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page.  Ugly, but necessary.
+ */
+int vmtruncate(struct inode *inode, loff_t offset)
+{
+       loff_t oldsize;
+       int error;
+
+       error = inode_newsize_ok(inode, offset);
+       if (error)
+               return error;
+       oldsize = inode->i_size;
+       i_size_write(inode, offset);
+       truncate_pagecache(inode, oldsize, offset);
+       if (inode->i_op->truncate)
+               inode->i_op->truncate(inode);
+
+       return error;
+}
+EXPORT_SYMBOL(vmtruncate);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 24 Sep 2009 15:32:11 +0000 (08:32 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 24 Sep 2009 15:32:11 +0000 (08:32 -0700)
Documentation/vm/locking		patch \| blob \| history
fs/attr.c		patch \| blob \| history
fs/befs/linuxvfs.c		patch \| blob \| history
fs/block_dev.c		patch \| blob \| history
fs/buffer.c		patch \| blob \| history
fs/cifs/cifsfs.c		patch \| blob \| history
fs/cifs/inode.c		patch \| blob \| history
fs/compat.c		patch \| blob \| history
fs/exofs/super.c		patch \| blob \| history
fs/fat/inode.c		patch \| blob \| history
fs/fuse/dir.c		patch \| blob \| history
fs/fuse/fuse_i.h		patch \| blob \| history
fs/fuse/inode.c		patch \| blob \| history
fs/hfs/mdb.c		patch \| blob \| history
fs/hfsplus/super.c		patch \| blob \| history
fs/hugetlbfs/inode.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
fs/internal.h		patch \| blob \| history
fs/ioctl.c		patch \| blob \| history
fs/isofs/inode.c		patch \| blob \| history
fs/jfs/super.c		patch \| blob \| history
fs/libfs.c		patch \| blob \| history
fs/namespace.c		patch \| blob \| history
fs/ncpfs/inode.c		patch \| blob \| history
fs/ncpfs/ioctl.c		patch \| blob \| history
fs/nfs/inode.c		patch \| blob \| history
fs/nls/nls_base.c		patch \| blob \| history
fs/ntfs/super.c		patch \| blob \| history
fs/ramfs/file-nommu.c		patch \| blob \| history
fs/read_write.c		patch \| blob \| history
fs/seq_file.c		patch \| blob \| history
fs/smbfs/inode.c		patch \| blob \| history
fs/super.c		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
include/linux/seq_file.h		patch \| blob \| history
mm/filemap.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mremap.c		patch \| blob \| history
mm/nommu.c		patch \| blob \| history
mm/truncate.c		patch \| blob \| history