From 866ad9a747bbf5461739fcae6d0a41c8971bbe1d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 3 Apr 2013 19:07:30 -0400 Subject: [PATCH] procfs: preparations for remove_proc_entry() race fixes * leave ->proc_fops alone; make ->pde_users negative instead * trim pde_opener * move relevant code in fs/proc/inode.c Signed-off-by: Al Viro --- fs/proc/generic.c | 83 +--------------- fs/proc/inode.c | 248 ++++++++++++++++++++++++------------------------ fs/proc/internal.h | 7 +- include/linux/proc_fs.h | 11 +-- 4 files changed, 135 insertions(+), 214 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a6a1cb5..bec5832 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -39,7 +39,7 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry /* buffer size is one page but our output routines use some slack for overruns */ #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) -static ssize_t +ssize_t __proc_file_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { @@ -171,48 +171,6 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes, return retval; } -static ssize_t -proc_file_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - struct proc_dir_entry *pde = PDE(file_inode(file)); - ssize_t rv = -EIO; - - spin_lock(&pde->pde_unload_lock); - if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); - return rv; - } - pde->pde_users++; - spin_unlock(&pde->pde_unload_lock); - - rv = __proc_file_read(file, buf, nbytes, ppos); - - pde_users_dec(pde); - return rv; -} - -static loff_t -proc_file_lseek(struct file *file, loff_t offset, int orig) -{ - loff_t retval = -EINVAL; - switch (orig) { - case 1: - offset += file->f_pos; - /* fallthrough */ - case 0: - if (offset < 0 || offset > MAX_NON_LFS) - break; - file->f_pos = retval = offset; - } - return retval; -} - -static const struct file_operations proc_file_operations = { - .llseek = proc_file_lseek, - .read = proc_file_read, -}; - static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; @@ -722,41 +680,6 @@ void pde_put(struct proc_dir_entry *pde) free_proc_entry(pde); } -static void entry_rundown(struct proc_dir_entry *de) -{ - spin_lock(&de->pde_unload_lock); - /* - * Stop accepting new callers into module. If you're - * dynamically allocating ->proc_fops, save a pointer somewhere. - */ - de->proc_fops = NULL; - /* Wait until all existing callers into module are done. */ - if (de->pde_users > 0) { - DECLARE_COMPLETION_ONSTACK(c); - - if (!de->pde_unload_completion) - de->pde_unload_completion = &c; - - spin_unlock(&de->pde_unload_lock); - - wait_for_completion(de->pde_unload_completion); - - spin_lock(&de->pde_unload_lock); - } - - while (!list_empty(&de->pde_openers)) { - struct pde_opener *pdeo; - - pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); - list_del(&pdeo->lh); - spin_unlock(&de->pde_unload_lock); - pdeo->release(pdeo->inode, pdeo->file); - kfree(pdeo); - spin_lock(&de->pde_unload_lock); - } - spin_unlock(&de->pde_unload_lock); -} - /* * Remove a /proc entry and free it if it's not currently in use. */ @@ -788,7 +711,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) return; } - entry_rundown(de); + proc_entry_rundown(de); if (S_ISDIR(de->mode)) parent->nlink--; @@ -837,7 +760,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) } spin_unlock(&proc_subdir_lock); - entry_rundown(de); + proc_entry_rundown(de); next = de->parent; if (S_ISDIR(de->mode)) next->nlink--; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a4aaaee..0cd9d80 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -129,96 +129,138 @@ static const struct super_operations proc_sops = { .show_options = proc_show_options, }; +enum {BIAS = -1U<<31}; + +static inline int use_pde(struct proc_dir_entry *pde) +{ + int res = 1; + spin_lock(&pde->pde_unload_lock); + if (unlikely(pde->pde_users < 0)) + res = 0; + else + pde->pde_users++; + spin_unlock(&pde->pde_unload_lock); + return res; +} + static void __pde_users_dec(struct proc_dir_entry *pde) { - pde->pde_users--; - if (pde->pde_unload_completion && pde->pde_users == 0) + if (--pde->pde_users == BIAS) complete(pde->pde_unload_completion); } -void pde_users_dec(struct proc_dir_entry *pde) +static void unuse_pde(struct proc_dir_entry *pde) { spin_lock(&pde->pde_unload_lock); __pde_users_dec(pde); spin_unlock(&pde->pde_unload_lock); } -static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) +void proc_entry_rundown(struct proc_dir_entry *de) { - struct proc_dir_entry *pde = PDE(file_inode(file)); - loff_t rv = -EINVAL; - loff_t (*llseek)(struct file *, loff_t, int); + spin_lock(&de->pde_unload_lock); + de->pde_users += BIAS; + /* Wait until all existing callers into module are done. */ + if (de->pde_users != BIAS) { + DECLARE_COMPLETION_ONSTACK(c); + de->pde_unload_completion = &c; + spin_unlock(&de->pde_unload_lock); - spin_lock(&pde->pde_unload_lock); - /* - * remove_proc_entry() is going to delete PDE (as part of module - * cleanup sequence). No new callers into module allowed. - */ - if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); - return rv; + wait_for_completion(de->pde_unload_completion); + + spin_lock(&de->pde_unload_lock); } - /* - * Bump refcount so that remove_proc_entry will wail for ->llseek to - * complete. - */ - pde->pde_users++; - /* - * Save function pointer under lock, to protect against ->proc_fops - * NULL'ifying right after ->pde_unload_lock is dropped. - */ - llseek = pde->proc_fops->llseek; - spin_unlock(&pde->pde_unload_lock); - if (!llseek) - llseek = default_llseek; - rv = llseek(file, offset, whence); + while (!list_empty(&de->pde_openers)) { + struct pde_opener *pdeo; + struct file *file; - pde_users_dec(pde); - return rv; + pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); + list_del(&pdeo->lh); + spin_unlock(&de->pde_unload_lock); + file = pdeo->file; + de->proc_fops->release(file_inode(file), file); + kfree(pdeo); + spin_lock(&de->pde_unload_lock); + } + spin_unlock(&de->pde_unload_lock); } -static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +/* ->read_proc() users - legacy crap */ +static ssize_t +proc_file_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) { struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; - ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); + if (use_pde(pde)) { + rv = __proc_file_read(file, buf, nbytes, ppos); + unuse_pde(pde); + } + return rv; +} - spin_lock(&pde->pde_unload_lock); - if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); - return rv; +static loff_t +proc_file_lseek(struct file *file, loff_t offset, int orig) +{ + loff_t retval = -EINVAL; + switch (orig) { + case 1: + offset += file->f_pos; + /* fallthrough */ + case 0: + if (offset < 0 || offset > MAX_NON_LFS) + break; + file->f_pos = retval = offset; } - pde->pde_users++; - read = pde->proc_fops->read; - spin_unlock(&pde->pde_unload_lock); + return retval; +} - if (read) - rv = read(file, buf, count, ppos); +const struct file_operations proc_file_operations = { + .llseek = proc_file_lseek, + .read = proc_file_read, +}; - pde_users_dec(pde); +static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) +{ + struct proc_dir_entry *pde = PDE(file_inode(file)); + loff_t rv = -EINVAL; + if (use_pde(pde)) { + loff_t (*llseek)(struct file *, loff_t, int); + llseek = pde->proc_fops->llseek; + if (!llseek) + llseek = default_llseek; + rv = llseek(file, offset, whence); + unuse_pde(pde); + } return rv; } -static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; - ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - - spin_lock(&pde->pde_unload_lock); - if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); - return rv; + if (use_pde(pde)) { + read = pde->proc_fops->read; + if (read) + rv = read(file, buf, count, ppos); + unuse_pde(pde); } - pde->pde_users++; - write = pde->proc_fops->write; - spin_unlock(&pde->pde_unload_lock); - - if (write) - rv = write(file, buf, count, ppos); + return rv; +} - pde_users_dec(pde); +static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +{ + ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); + struct proc_dir_entry *pde = PDE(file_inode(file)); + ssize_t rv = -EIO; + if (use_pde(pde)) { + write = pde->proc_fops->write; + if (write) + rv = write(file, buf, count, ppos); + unuse_pde(pde); + } return rv; } @@ -227,20 +269,12 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p struct proc_dir_entry *pde = PDE(file_inode(file)); unsigned int rv = DEFAULT_POLLMASK; unsigned int (*poll)(struct file *, struct poll_table_struct *); - - spin_lock(&pde->pde_unload_lock); - if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); - return rv; + if (use_pde(pde)) { + poll = pde->proc_fops->poll; + if (poll) + rv = poll(file, pts); + unuse_pde(pde); } - pde->pde_users++; - poll = pde->proc_fops->poll; - spin_unlock(&pde->pde_unload_lock); - - if (poll) - rv = poll(file, pts); - - pde_users_dec(pde); return rv; } @@ -249,20 +283,12 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; long (*ioctl)(struct file *, unsigned int, unsigned long); - - spin_lock(&pde->pde_unload_lock); - if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); - return rv; + if (use_pde(pde)) { + ioctl = pde->proc_fops->unlocked_ioctl; + if (ioctl) + rv = ioctl(file, cmd, arg); + unuse_pde(pde); } - pde->pde_users++; - ioctl = pde->proc_fops->unlocked_ioctl; - spin_unlock(&pde->pde_unload_lock); - - if (ioctl) - rv = ioctl(file, cmd, arg); - - pde_users_dec(pde); return rv; } @@ -272,20 +298,12 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; long (*compat_ioctl)(struct file *, unsigned int, unsigned long); - - spin_lock(&pde->pde_unload_lock); - if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); - return rv; + if (use_pde(pde)) { + compat_ioctl = pde->proc_fops->compat_ioctl; + if (compat_ioctl) + rv = compat_ioctl(file, cmd, arg); + unuse_pde(pde); } - pde->pde_users++; - compat_ioctl = pde->proc_fops->compat_ioctl; - spin_unlock(&pde->pde_unload_lock); - - if (compat_ioctl) - rv = compat_ioctl(file, cmd, arg); - - pde_users_dec(pde); return rv; } #endif @@ -295,20 +313,12 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) struct proc_dir_entry *pde = PDE(file_inode(file)); int rv = -EIO; int (*mmap)(struct file *, struct vm_area_struct *); - - spin_lock(&pde->pde_unload_lock); - if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); - return rv; + if (use_pde(pde)) { + mmap = pde->proc_fops->mmap; + if (mmap) + rv = mmap(file, vma); + unuse_pde(pde); } - pde->pde_users++; - mmap = pde->proc_fops->mmap; - spin_unlock(&pde->pde_unload_lock); - - if (mmap) - rv = mmap(file, vma); - - pde_users_dec(pde); return rv; } @@ -334,16 +344,12 @@ static int proc_reg_open(struct inode *inode, struct file *file) if (!pdeo) return -ENOMEM; - spin_lock(&pde->pde_unload_lock); - if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); + if (!use_pde(pde)) { kfree(pdeo); return -ENOENT; } - pde->pde_users++; open = pde->proc_fops->open; release = pde->proc_fops->release; - spin_unlock(&pde->pde_unload_lock); if (open) rv = open(inode, file); @@ -351,10 +357,8 @@ static int proc_reg_open(struct inode *inode, struct file *file) spin_lock(&pde->pde_unload_lock); if (rv == 0 && release) { /* To know what to release. */ - pdeo->inode = inode; pdeo->file = file; /* Strictly for "too late" ->release in proc_reg_release(). */ - pdeo->release = release; list_add(&pdeo->lh, &pde->pde_openers); } else kfree(pdeo); @@ -364,12 +368,12 @@ static int proc_reg_open(struct inode *inode, struct file *file) } static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde, - struct inode *inode, struct file *file) + struct file *file) { struct pde_opener *pdeo; list_for_each_entry(pdeo, &pde->pde_openers, lh) { - if (pdeo->inode == inode && pdeo->file == file) + if (pdeo->file == file) return pdeo; } return NULL; @@ -383,8 +387,8 @@ static int proc_reg_release(struct inode *inode, struct file *file) struct pde_opener *pdeo; spin_lock(&pde->pde_unload_lock); - pdeo = find_pde_opener(pde, inode, file); - if (!pde->proc_fops) { + pdeo = find_pde_opener(pde, file); + if (pde->pde_users < 0) { /* * Can't simply exit, __fput() will think that everything is OK, * and move on to freeing struct file. remove_proc_entry() will @@ -396,7 +400,7 @@ static int proc_reg_release(struct inode *inode, struct file *file) if (pdeo) { list_del(&pdeo->lh); spin_unlock(&pde->pde_unload_lock); - rv = pdeo->release(inode, file); + rv = pde->proc_fops->release(inode, file); kfree(pdeo); } else spin_unlock(&pde->pde_unload_lock); @@ -413,7 +417,7 @@ static int proc_reg_release(struct inode *inode, struct file *file) if (release) rv = release(inode, file); - pde_users_dec(pde); + unuse_pde(pde); return rv; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 9c93a53..c43d536 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -151,12 +151,13 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, filldir_t filldir); struct pde_opener { - struct inode *inode; struct file *file; - int (*release)(struct inode *, struct file *); struct list_head lh; }; -void pde_users_dec(struct proc_dir_entry *pde); + +ssize_t __proc_file_read(struct file *, char __user *, size_t, loff_t *); +extern const struct file_operations proc_file_operations; +void proc_entry_rundown(struct proc_dir_entry *); extern spinlock_t proc_subdir_lock; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index bcc0e10..947ae7eb 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -60,20 +60,13 @@ struct proc_dir_entry { kgid_t gid; loff_t size; const struct inode_operations *proc_iops; - /* - * NULL ->proc_fops means "PDE is going away RSN" or - * "PDE is just created". In either case, e.g. ->read_proc won't be - * called because it's too late or too early, respectively. - * - * If you're allocating ->proc_fops dynamically, save a pointer - * somewhere. - */ const struct file_operations *proc_fops; struct proc_dir_entry *next, *parent, *subdir; void *data; read_proc_t *read_proc; atomic_t count; /* use count */ - int pde_users; /* number of callers into module in progress */ + int pde_users; /* number of callers into module in progress; */ + /* negative -> it's going away RSN */ struct completion *pde_unload_completion; struct list_head pde_openers; /* who did ->open, but not ->release */ spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ -- 2.7.4