From f03c65993b98eeb909a4012ce7833c5857d74755 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Jan 2011 22:30:21 -0500 Subject: [PATCH] sanitize vfsmount refcounting changes Instead of splitting refcount between (per-cpu) mnt_count and (SMP-only) mnt_longrefs, make all references contribute to mnt_count again and keep track of how many are longterm ones. Accounting rules for longterm count: * 1 for each fs_struct.root.mnt * 1 for each fs_struct.pwd.mnt * 1 for having non-NULL ->mnt_ns * decrement to 0 happens only under vfsmount lock exclusive That allows nice common case for mntput() - since we can't drop the final reference until after mnt_longterm has reached 0 due to the rules above, mntput() can grab vfsmount lock shared and check mnt_longterm. If it turns out to be non-zero (which is the common case), we know that this is not the final mntput() and can just blindly decrement percpu mnt_count. Otherwise we grab vfsmount lock exclusive and do usual decrement-and-check of percpu mnt_count. For fs_struct.c we have mnt_make_longterm() and mnt_make_shortterm(); namespace.c uses the latter in places where we don't already hold vfsmount lock exclusive and opencodes a few remaining spots where we need to manipulate mnt_longterm. Note that we mostly revert the code outside of fs/namespace.c back to what we used to have; in particular, normal code doesn't need to care about two kinds of references, etc. And we get to keep the optimization Nick's variant had bought us... Signed-off-by: Al Viro --- drivers/mtd/mtdchar.c | 2 +- fs/anon_inodes.c | 2 +- fs/fs_struct.c | 35 ++++++++++----- fs/internal.h | 3 ++ fs/namei.c | 24 ----------- fs/namespace.c | 116 +++++++++++++++++++------------------------------- fs/pipe.c | 2 +- fs/super.c | 2 +- include/linux/mount.h | 4 +- include/linux/path.h | 2 - 10 files changed, 75 insertions(+), 117 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index ee4bb33..9824057 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1201,7 +1201,7 @@ err_unregister_chdev: static void __exit cleanup_mtdchar(void) { unregister_mtd_user(&mtdchar_notifier); - mntput_long(mtd_inode_mnt); + mntput(mtd_inode_mnt); unregister_filesystem(&mtd_inodefs_type); __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index cbe57f3..c5567cb 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -233,7 +233,7 @@ static int __init anon_inode_init(void) return 0; err_mntput: - mntput_long(anon_inode_mnt); + mntput(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 68ca487..78b519c 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -4,6 +4,19 @@ #include #include #include +#include "internal.h" + +static inline void path_get_longterm(struct path *path) +{ + path_get(path); + mnt_make_longterm(path->mnt); +} + +static inline void path_put_longterm(struct path *path) +{ + mnt_make_shortterm(path->mnt); + path_put(path); +} /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. @@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; - path_get_long(path); + path_get_longterm(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put_long(&old_root); + path_put_longterm(&old_root); } /* @@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; - path_get_long(path); + path_get_longterm(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put_long(&old_pwd); + path_put_longterm(&old_pwd); } void chroot_fs_refs(struct path *old_root, struct path *new_root) @@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) write_seqcount_begin(&fs->seq); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { - path_get_long(new_root); + path_get_longterm(new_root); fs->root = *new_root; count++; } if (fs->pwd.dentry == old_root->dentry && fs->pwd.mnt == old_root->mnt) { - path_get_long(new_root); + path_get_longterm(new_root); fs->pwd = *new_root; count++; } @@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put_long(old_root); + path_put_longterm(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put_long(&fs->root); - path_put_long(&fs->pwd); + path_put_longterm(&fs->root); + path_put_longterm(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) spin_lock(&old->lock); fs->root = old->root; - path_get_long(&fs->root); + path_get_longterm(&fs->root); fs->pwd = old->pwd; - path_get_long(&fs->pwd); + path_get_longterm(&fs->pwd); spin_unlock(&old->lock); } return fs; diff --git a/fs/internal.h b/fs/internal.h index 4931060..12ccb86 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -73,6 +73,9 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern int do_add_mount(struct vfsmount *, struct path *, int); extern void mnt_clear_expiry(struct vfsmount *); +extern void mnt_make_longterm(struct vfsmount *); +extern void mnt_make_shortterm(struct vfsmount *); + extern void __init mnt_init(void); DECLARE_BRLOCK(vfsmount_lock); diff --git a/fs/namei.c b/fs/namei.c index c2e3772..8f7b41a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -368,18 +368,6 @@ void path_get(struct path *path) EXPORT_SYMBOL(path_get); /** - * path_get_long - get a long reference to a path - * @path: path to get the reference to - * - * Given a path increment the reference count to the dentry and the vfsmount. - */ -void path_get_long(struct path *path) -{ - mntget_long(path->mnt); - dget(path->dentry); -} - -/** * path_put - put a reference to a path * @path: path to put the reference to * @@ -393,18 +381,6 @@ void path_put(struct path *path) EXPORT_SYMBOL(path_put); /** - * path_put_long - put a long reference to a path - * @path: path to put the reference to - * - * Given a path decrement the reference count to the dentry and the vfsmount. - */ -void path_put_long(struct path *path) -{ - dput(path->dentry); - mntput_long(path->mnt); -} - -/** * nameidata_drop_rcu - drop this nameidata out of rcu-walk * @nd: nameidata pathwalk data to drop * Returns: 0 on success, -ECHILD on failure diff --git a/fs/namespace.c b/fs/namespace.c index d7fc05f..48809e2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt) unsigned int mnt_get_count(struct vfsmount *mnt) { #ifdef CONFIG_SMP - unsigned int count = atomic_read(&mnt->mnt_longrefs); + unsigned int count = 0; int cpu; for_each_possible_cpu(cpu) { @@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) if (!mnt->mnt_pcp) goto out_free_devname; - atomic_set(&mnt->mnt_longrefs, 1); + this_cpu_add(mnt->mnt_pcp->mnt_count, 1); #else mnt->mnt_count = 1; mnt->mnt_writers = 0; @@ -624,8 +624,11 @@ static void commit_tree(struct vfsmount *mnt) BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); - list_for_each_entry(m, &head, mnt_list) + list_for_each_entry(m, &head, mnt_list) { m->mnt_ns = n; + atomic_inc(&m->mnt_longterm); + } + list_splice(&head, n->list.prev); list_add_tail(&mnt->mnt_hash, mount_hashtable + @@ -734,51 +737,30 @@ static inline void mntfree(struct vfsmount *mnt) deactivate_super(sb); } -#ifdef CONFIG_SMP -static inline void __mntput(struct vfsmount *mnt, int longrefs) +static void mntput_no_expire(struct vfsmount *mnt) { - if (!longrefs) { put_again: - br_read_lock(vfsmount_lock); - if (likely(atomic_read(&mnt->mnt_longrefs))) { - mnt_dec_count(mnt); - br_read_unlock(vfsmount_lock); - return; - } +#ifdef CONFIG_SMP + br_read_lock(vfsmount_lock); + if (likely(atomic_read(&mnt->mnt_longterm))) { + mnt_dec_count(mnt); br_read_unlock(vfsmount_lock); - } else { - BUG_ON(!atomic_read(&mnt->mnt_longrefs)); - if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) - return; + return; } + br_read_unlock(vfsmount_lock); br_write_lock(vfsmount_lock); - if (!longrefs) - mnt_dec_count(mnt); - else - atomic_dec(&mnt->mnt_longrefs); + mnt_dec_count(mnt); if (mnt_get_count(mnt)) { br_write_unlock(vfsmount_lock); return; } - if (unlikely(mnt->mnt_pinned)) { - mnt_add_count(mnt, mnt->mnt_pinned + 1); - mnt->mnt_pinned = 0; - br_write_unlock(vfsmount_lock); - acct_auto_close_mnt(mnt); - goto put_again; - } - br_write_unlock(vfsmount_lock); - mntfree(mnt); -} #else -static inline void __mntput(struct vfsmount *mnt, int longrefs) -{ -put_again: mnt_dec_count(mnt); if (likely(mnt_get_count(mnt))) return; br_write_lock(vfsmount_lock); +#endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; @@ -789,12 +771,6 @@ put_again: br_write_unlock(vfsmount_lock); mntfree(mnt); } -#endif - -static void mntput_no_expire(struct vfsmount *mnt) -{ - __mntput(mnt, 0); -} void mntput(struct vfsmount *mnt) { @@ -802,7 +778,7 @@ void mntput(struct vfsmount *mnt) /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ if (unlikely(mnt->mnt_expiry_mark)) mnt->mnt_expiry_mark = 0; - __mntput(mnt, 0); + mntput_no_expire(mnt); } } EXPORT_SYMBOL(mntput); @@ -815,33 +791,6 @@ struct vfsmount *mntget(struct vfsmount *mnt) } EXPORT_SYMBOL(mntget); -void mntput_long(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - if (mnt) { - /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ - if (unlikely(mnt->mnt_expiry_mark)) - mnt->mnt_expiry_mark = 0; - __mntput(mnt, 1); - } -#else - mntput(mnt); -#endif -} -EXPORT_SYMBOL(mntput_long); - -struct vfsmount *mntget_long(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - if (mnt) - atomic_inc(&mnt->mnt_longrefs); - return mnt; -#else - return mntget(mnt); -#endif -} -EXPORT_SYMBOL(mntget_long); - void mnt_pin(struct vfsmount *mnt) { br_write_lock(vfsmount_lock); @@ -1216,7 +1165,7 @@ void release_mounts(struct list_head *head) dput(dentry); mntput(m); } - mntput_long(mnt); + mntput(mnt); } } @@ -1240,6 +1189,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; + atomic_dec(&p->mnt_longterm); list_del_init(&p->mnt_child); if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; @@ -1969,7 +1919,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) unlock: up_write(&namespace_sem); - mntput_long(newmnt); + mntput(newmnt); return err; } @@ -2291,6 +2241,20 @@ static struct mnt_namespace *alloc_mnt_ns(void) return new_ns; } +void mnt_make_longterm(struct vfsmount *mnt) +{ + atomic_inc(&mnt->mnt_longterm); +} + +void mnt_make_shortterm(struct vfsmount *mnt) +{ + if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) + return; + br_write_lock(vfsmount_lock); + atomic_dec(&mnt->mnt_longterm); + br_write_unlock(vfsmount_lock); +} + /* * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. @@ -2328,14 +2292,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, q = new_ns->root; while (p) { q->mnt_ns = new_ns; + atomic_inc(&q->mnt_longterm); if (fs) { if (p == fs->root.mnt) { + fs->root.mnt = mntget(q); + atomic_inc(&q->mnt_longterm); + mnt_make_shortterm(p); rootmnt = p; - fs->root.mnt = mntget_long(q); } if (p == fs->pwd.mnt) { + fs->pwd.mnt = mntget(q); + atomic_inc(&q->mnt_longterm); + mnt_make_shortterm(p); pwdmnt = p; - fs->pwd.mnt = mntget_long(q); } } p = next_mnt(p, mnt_ns->root); @@ -2344,9 +2313,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, up_write(&namespace_sem); if (rootmnt) - mntput_long(rootmnt); + mntput(rootmnt); if (pwdmnt) - mntput_long(pwdmnt); + mntput(pwdmnt); return new_ns; } @@ -2379,6 +2348,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) new_ns = alloc_mnt_ns(); if (!IS_ERR(new_ns)) { mnt->mnt_ns = new_ns; + atomic_inc(&mnt->mnt_longterm); new_ns->root = mnt; list_add(&new_ns->list, &new_ns->root->mnt_list); } diff --git a/fs/pipe.c b/fs/pipe.c index e2e95fb..89e9e19 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void) static void __exit exit_pipe_fs(void) { unregister_filesystem(&pipe_fs_type); - mntput_long(pipe_mnt); + mntput(pipe_mnt); } fs_initcall(init_pipe_fs); diff --git a/fs/super.c b/fs/super.c index 4f6a357..74e149e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return mnt; err: - mntput_long(mnt); + mntput(mnt); return ERR_PTR(err); } diff --git a/include/linux/mount.h b/include/linux/mount.h index af4765e..604f122 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -60,7 +60,7 @@ struct vfsmount { struct super_block *mnt_sb; /* pointer to superblock */ #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; - atomic_t mnt_longrefs; + atomic_t mnt_longterm; /* how many of the refs are longterm */ #else int mnt_count; int mnt_writers; @@ -96,8 +96,6 @@ extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); -extern void mntput_long(struct vfsmount *mnt); -extern struct vfsmount *mntget_long(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); diff --git a/include/linux/path.h b/include/linux/path.h index a581e8c..edc98de 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -10,9 +10,7 @@ struct path { }; extern void path_get(struct path *); -extern void path_get_long(struct path *); extern void path_put(struct path *); -extern void path_put_long(struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { -- 2.7.4