From 1712ac8fda7d8bc4dc921f5777b7423aacad7263 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 22 Mar 2010 15:22:31 -0400 Subject: [PATCH] Saner locking around deactivate_super() Make sure that s_umount is acquired *before* we drop the final active reference; we still have the fast path (atomic_dec_unless) and we have gotten rid of the window between the moment when s_active hits zero and s_umount is acquired. Which simplifies the living hell out of grab_super() and inotify pin_to_kill() stuff. Signed-off-by: Al Viro --- fs/notify/inotify/inotify.c | 76 +++++++-------------------------------------- fs/super.c | 45 ++++++++++++--------------- 2 files changed, 30 insertions(+), 91 deletions(-) diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index 441ef13..27b75eb 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c @@ -511,34 +511,8 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); * done. Cleanup is just deactivate_super(). However, that leaves a messy * case - what if we *are* racing with umount() and active references to * superblock can't be acquired anymore? We can bump ->s_count, grab - * ->s_umount, which will almost certainly wait until the superblock is shut - * down and the watch in question is pining for fjords. That's fine, but - * there is a problem - we might have hit the window between ->s_active - * getting to 0 (i.e. the moment when superblock is past the point of no return - * and is heading for shutdown) and the moment when deactivate_super() acquires - * ->s_umount. We could just do drop_super() yield() and retry, but that's - * rather antisocial and this stuff is luser-triggerable. OTOH, having grabbed - * ->s_umount and having found that we'd got there first (i.e. that ->s_root is - * non-NULL) we know that we won't race with inotify_umount_inodes(). So we - * could grab a reference to watch and do the rest as above, just with - * drop_super() instead of deactivate_super(), right? Wrong. We had to drop - * ih->mutex before we could grab ->s_umount. So the watch could've been gone - * already. - * - * That still can be dealt with - we need to save watch->wd, do idr_find() - * and compare its result with our pointer. If they match, we either have - * the damn thing still alive or we'd lost not one but two races at once, - * the watch had been killed and a new one got created with the same ->wd - * at the same address. That couldn't have happened in inotify_destroy(), - * but inotify_rm_wd() could run into that. Still, "new one got created" - * is not a problem - we have every right to kill it or leave it alone, - * whatever's more convenient. - * - * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as - * "grab it and kill it" check. If it's been our original watch, we are - * fine, if it's a newcomer - nevermind, just pretend that we'd won the - * race and kill the fscker anyway; we are safe since we know that its - * superblock won't be going away. + * ->s_umount, which will wait until the superblock is shut down and the + * watch in question is pining for fjords. * * And yes, this is far beyond mere "not very pretty"; so's the entire * concept of inotify to start with. @@ -552,14 +526,10 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); * Called with ih->mutex held, drops it. Possible return values: * 0 - nothing to do, it has died * 1 - remove it, drop the reference and deactivate_super() - * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid - * that variant, since it involved a lot of PITA, but that's the best that - * could've been done. */ static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) { struct super_block *sb = watch->inode->i_sb; - s32 wd = watch->wd; if (atomic_inc_not_zero(&sb->s_active)) { get_inotify_watch(watch); @@ -571,36 +541,16 @@ static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) spin_unlock(&sb_lock); mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ down_read(&sb->s_umount); - if (likely(!sb->s_root)) { - /* fs is already shut down; the watch is dead */ - drop_super(sb); - return 0; - } - /* raced with the final deactivate_super() */ - mutex_lock(&ih->mutex); - if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { - /* the watch is dead */ - mutex_unlock(&ih->mutex); - drop_super(sb); - return 0; - } - /* still alive or freed and reused with the same sb and wd; kill */ - get_inotify_watch(watch); - mutex_unlock(&ih->mutex); - return 2; + /* fs is already shut down; the watch is dead */ + drop_super(sb); + return 0; } -static void unpin_and_kill(struct inotify_watch *watch, int how) +static void unpin_and_kill(struct inotify_watch *watch) { struct super_block *sb = watch->inode->i_sb; put_inotify_watch(watch); - switch (how) { - case 1: - deactivate_super(sb); - break; - case 2: - drop_super(sb); - } + deactivate_super(sb); } /** @@ -622,7 +572,6 @@ void inotify_destroy(struct inotify_handle *ih) struct list_head *watches; struct super_block *sb; struct inode *inode; - int how; mutex_lock(&ih->mutex); watches = &ih->watches; @@ -632,8 +581,7 @@ void inotify_destroy(struct inotify_handle *ih) } watch = list_first_entry(watches, struct inotify_watch, h_list); sb = watch->inode->i_sb; - how = pin_to_kill(ih, watch); - if (!how) + if (!pin_to_kill(ih, watch)) continue; inode = watch->inode; @@ -648,7 +596,7 @@ void inotify_destroy(struct inotify_handle *ih) mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); - unpin_and_kill(watch, how); + unpin_and_kill(watch); } /* free this handle: the put matching the get in inotify_init() */ @@ -851,7 +799,6 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) struct inotify_watch *watch; struct super_block *sb; struct inode *inode; - int how; mutex_lock(&ih->mutex); watch = idr_find(&ih->idr, wd); @@ -860,8 +807,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) return -EINVAL; } sb = watch->inode->i_sb; - how = pin_to_kill(ih, watch); - if (!how) + if (!pin_to_kill(ih, watch)) return 0; inode = watch->inode; @@ -875,7 +821,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); - unpin_and_kill(watch, how); + unpin_and_kill(watch); return 0; } diff --git a/fs/super.c b/fs/super.c index bc734f8..157657b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -178,53 +178,48 @@ void put_super(struct super_block *sb) /** - * deactivate_super - drop an active reference to superblock + * deactivate_locked_super - drop an active reference to superblock * @s: superblock to deactivate * - * Drops an active reference to superblock, acquiring a temprory one if - * there is no active references left. In that case we lock superblock, + * Drops an active reference to superblock, converting it into a temprory + * one if there is no other active references left. In that case we * tell fs driver to shut it down and drop the temporary reference we * had just acquired. + * + * Caller holds exclusive lock on superblock; that lock is released. */ -void deactivate_super(struct super_block *s) +void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { vfs_dq_off(s, 0); - down_write(&s->s_umount); fs->kill_sb(s); put_filesystem(fs); put_super(s); + } else { + up_write(&s->s_umount); } } -EXPORT_SYMBOL(deactivate_super); +EXPORT_SYMBOL(deactivate_locked_super); /** - * deactivate_locked_super - drop an active reference to superblock + * deactivate_super - drop an active reference to superblock * @s: superblock to deactivate * - * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that - * it does not unlock it until it's all over. As the result, it's safe to - * use to dispose of new superblock on ->get_sb() failure exits - nobody - * will see the sucker until it's all over. Equivalent using up_write + - * deactivate_super is safe for that purpose only if superblock is either - * safe to use or has NULL ->s_root when we unlock. + * Variant of deactivate_locked_super(), except that superblock is *not* + * locked by caller. If we are going to drop the final active reference, + * lock will be acquired prior to that. */ -void deactivate_locked_super(struct super_block *s) +void deactivate_super(struct super_block *s) { - struct file_system_type *fs = s->s_type; - if (atomic_dec_and_test(&s->s_active)) { - vfs_dq_off(s, 0); - fs->kill_sb(s); - put_filesystem(fs); - put_super(s); - } else { - up_write(&s->s_umount); + if (!atomic_add_unless(&s->s_active, -1, 1)) { + down_write(&s->s_umount); + deactivate_locked_super(s); } } -EXPORT_SYMBOL(deactivate_locked_super); +EXPORT_SYMBOL(deactivate_super); /** * grab_super - acquire an active reference @@ -247,12 +242,10 @@ static int grab_super(struct super_block *s) __releases(sb_lock) /* it's going away */ s->s_count++; spin_unlock(&sb_lock); - /* usually that'll be enough for it to die... */ + /* wait for it to die */ down_write(&s->s_umount); up_write(&s->s_umount); put_super(s); - /* ... but in case it wasn't, let's at least yield() */ - yield(); return 0; } -- 2.7.4