simple_recursive_removal(): kernel-side rm -rf for ramfs-style filesystems
authorAl Viro <viro@zeniv.linux.org.uk>
Mon, 18 Nov 2019 14:43:10 +0000 (09:43 -0500)
committerAl Viro <viro@zeniv.linux.org.uk>
Wed, 11 Dec 2019 03:29:58 +0000 (22:29 -0500)
two requirements: no file creations in IS_DEADDIR and no cross-directory
renames whatsoever.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/debugfs/inode.c
fs/libfs.c
fs/tracefs/inode.c
include/linux/debugfs.h
include/linux/fs.h
include/linux/tracefs.h
kernel/trace/trace.c
kernel/trace/trace_events.c
kernel/trace/trace_hwlat.c

index f4d8df5..bda942a 100644 (file)
@@ -332,7 +332,10 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
                parent = debugfs_mount->mnt_root;
 
        inode_lock(d_inode(parent));
-       dentry = lookup_one_len(name, parent, strlen(name));
+       if (unlikely(IS_DEADDIR(d_inode(parent))))
+               dentry = ERR_PTR(-ENOENT);
+       else
+               dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
                if (d_is_dir(dentry))
                        pr_err("Directory '%s' with parent '%s' already present!\n",
@@ -681,62 +684,15 @@ static void __debugfs_file_removed(struct dentry *dentry)
                wait_for_completion(&fsd->active_users_drained);
 }
 
-static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
-{
-       int ret = 0;
-
-       if (simple_positive(dentry)) {
-               dget(dentry);
-               if (d_is_dir(dentry)) {
-                       ret = simple_rmdir(d_inode(parent), dentry);
-                       if (!ret)
-                               fsnotify_rmdir(d_inode(parent), dentry);
-               } else {
-                       simple_unlink(d_inode(parent), dentry);
-                       fsnotify_unlink(d_inode(parent), dentry);
-               }
-               if (!ret)
-                       d_delete(dentry);
-               if (d_is_reg(dentry))
-                       __debugfs_file_removed(dentry);
-               dput(dentry);
-       }
-       return ret;
-}
-
-/**
- * debugfs_remove - removes a file or directory from the debugfs filesystem
- * @dentry: a pointer to a the dentry of the file or directory to be
- *          removed.  If this parameter is NULL or an error value, nothing
- *          will be done.
- *
- * This function removes a file or directory in debugfs that was previously
- * created with a call to another debugfs function (like
- * debugfs_create_file() or variants thereof.)
- *
- * This function is required to be called in order for the file to be
- * removed, no automatic cleanup of files will happen when a module is
- * removed, you are responsible here.
- */
-void debugfs_remove(struct dentry *dentry)
+static void remove_one(struct dentry *victim)
 {
-       struct dentry *parent;
-       int ret;
-
-       if (IS_ERR_OR_NULL(dentry))
-               return;
-
-       parent = dentry->d_parent;
-       inode_lock(d_inode(parent));
-       ret = __debugfs_remove(dentry, parent);
-       inode_unlock(d_inode(parent));
-       if (!ret)
-               simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+        if (d_is_reg(victim))
+               __debugfs_file_removed(victim);
+       simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
-EXPORT_SYMBOL_GPL(debugfs_remove);
 
 /**
- * debugfs_remove_recursive - recursively removes a directory
+ * debugfs_remove - recursively removes a directory
  * @dentry: a pointer to a the dentry of the directory to be removed.  If this
  *          parameter is NULL or an error value, nothing will be done.
  *
@@ -748,65 +704,16 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
  * removed, no automatic cleanup of files will happen when a module is
  * removed, you are responsible here.
  */
-void debugfs_remove_recursive(struct dentry *dentry)
+void debugfs_remove(struct dentry *dentry)
 {
-       struct dentry *child, *parent;
-
        if (IS_ERR_OR_NULL(dentry))
                return;
 
-       parent = dentry;
- down:
-       inode_lock(d_inode(parent));
- loop:
-       /*
-        * The parent->d_subdirs is protected by the d_lock. Outside that
-        * lock, the child can be unlinked and set to be freed which can
-        * use the d_u.d_child as the rcu head and corrupt this list.
-        */
-       spin_lock(&parent->d_lock);
-       list_for_each_entry(child, &parent->d_subdirs, d_child) {
-               if (!simple_positive(child))
-                       continue;
-
-               /* perhaps simple_empty(child) makes more sense */
-               if (!list_empty(&child->d_subdirs)) {
-                       spin_unlock(&parent->d_lock);
-                       inode_unlock(d_inode(parent));
-                       parent = child;
-                       goto down;
-               }
-
-               spin_unlock(&parent->d_lock);
-
-               if (!__debugfs_remove(child, parent))
-                       simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-
-               /*
-                * The parent->d_lock protects agaist child from unlinking
-                * from d_subdirs. When releasing the parent->d_lock we can
-                * no longer trust that the next pointer is valid.
-                * Restart the loop. We'll skip this one with the
-                * simple_positive() check.
-                */
-               goto loop;
-       }
-       spin_unlock(&parent->d_lock);
-
-       inode_unlock(d_inode(parent));
-       child = parent;
-       parent = parent->d_parent;
-       inode_lock(d_inode(parent));
-
-       if (child != dentry)
-               /* go up */
-               goto loop;
-
-       if (!__debugfs_remove(child, parent))
-               simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-       inode_unlock(d_inode(parent));
+       simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count);
+       simple_recursive_removal(dentry, remove_one);
+       simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
-EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
+EXPORT_SYMBOL_GPL(debugfs_remove);
 
 /**
  * debugfs_rename - rename a file/directory in the debugfs filesystem
index 1463b03..c686bd9 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/buffer_head.h> /* sync_mapping_buffers */
 #include <linux/fs_context.h>
 #include <linux/pseudo_fs.h>
+#include <linux/fsnotify.h>
 
 #include <linux/uaccess.h>
 
@@ -239,6 +240,75 @@ const struct inode_operations simple_dir_inode_operations = {
 };
 EXPORT_SYMBOL(simple_dir_inode_operations);
 
+static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
+{
+       struct dentry *child = NULL;
+       struct list_head *p = prev ? &prev->d_child : &parent->d_subdirs;
+
+       spin_lock(&parent->d_lock);
+       while ((p = p->next) != &parent->d_subdirs) {
+               struct dentry *d = container_of(p, struct dentry, d_child);
+               if (simple_positive(d)) {
+                       spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+                       if (simple_positive(d))
+                               child = dget_dlock(d);
+                       spin_unlock(&d->d_lock);
+                       if (likely(child))
+                               break;
+               }
+       }
+       spin_unlock(&parent->d_lock);
+       dput(prev);
+       return child;
+}
+
+void simple_recursive_removal(struct dentry *dentry,
+                              void (*callback)(struct dentry *))
+{
+       struct dentry *this = dget(dentry);
+       while (true) {
+               struct dentry *victim = NULL, *child;
+               struct inode *inode = this->d_inode;
+
+               inode_lock(inode);
+               if (d_is_dir(this))
+                       inode->i_flags |= S_DEAD;
+               while ((child = find_next_child(this, victim)) == NULL) {
+                       // kill and ascend
+                       // update metadata while it's still locked
+                       inode->i_ctime = current_time(inode);
+                       clear_nlink(inode);
+                       inode_unlock(inode);
+                       victim = this;
+                       this = this->d_parent;
+                       inode = this->d_inode;
+                       inode_lock(inode);
+                       if (simple_positive(victim)) {
+                               d_invalidate(victim);   // avoid lost mounts
+                               if (d_is_dir(victim))
+                                       fsnotify_rmdir(inode, victim);
+                               else
+                                       fsnotify_unlink(inode, victim);
+                               if (callback)
+                                       callback(victim);
+                               dput(victim);           // unpin it
+                       }
+                       if (victim == dentry) {
+                               inode->i_ctime = inode->i_mtime =
+                                       current_time(inode);
+                               if (d_is_dir(dentry))
+                                       drop_nlink(inode);
+                               inode_unlock(inode);
+                               dput(dentry);
+                               return;
+                       }
+               }
+               inode_unlock(inode);
+               this = child;
+       }
+}
+EXPORT_SYMBOL(simple_recursive_removal);
+
 static const struct super_operations simple_super_operations = {
        .statfs         = simple_statfs,
 };
index 0caa151..0ee8c6d 100644 (file)
@@ -330,7 +330,10 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
                parent = tracefs_mount->mnt_root;
 
        inode_lock(parent->d_inode);
-       dentry = lookup_one_len(name, parent, strlen(name));
+       if (unlikely(IS_DEADDIR(parent->d_inode)))
+               dentry = ERR_PTR(-ENOENT);
+       else
+               dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry) && dentry->d_inode) {
                dput(dentry);
                dentry = ERR_PTR(-EEXIST);
@@ -499,122 +502,27 @@ __init struct dentry *tracefs_create_instance_dir(const char *name,
        return dentry;
 }
 
-static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
+static void remove_one(struct dentry *victim)
 {
-       int ret = 0;
-
-       if (simple_positive(dentry)) {
-               if (dentry->d_inode) {
-                       dget(dentry);
-                       switch (dentry->d_inode->i_mode & S_IFMT) {
-                       case S_IFDIR:
-                               ret = simple_rmdir(parent->d_inode, dentry);
-                               if (!ret)
-                                       fsnotify_rmdir(parent->d_inode, dentry);
-                               break;
-                       default:
-                               simple_unlink(parent->d_inode, dentry);
-                               fsnotify_unlink(parent->d_inode, dentry);
-                               break;
-                       }
-                       if (!ret)
-                               d_delete(dentry);
-                       dput(dentry);
-               }
-       }
-       return ret;
-}
-
-/**
- * tracefs_remove - removes a file or directory from the tracefs filesystem
- * @dentry: a pointer to a the dentry of the file or directory to be
- *          removed.
- *
- * This function removes a file or directory in tracefs that was previously
- * created with a call to another tracefs function (like
- * tracefs_create_file() or variants thereof.)
- */
-void tracefs_remove(struct dentry *dentry)
-{
-       struct dentry *parent;
-       int ret;
-
-       if (IS_ERR_OR_NULL(dentry))
-               return;
-
-       parent = dentry->d_parent;
-       inode_lock(parent->d_inode);
-       ret = __tracefs_remove(dentry, parent);
-       inode_unlock(parent->d_inode);
-       if (!ret)
-               simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+       simple_release_fs(&tracefs_mount, &tracefs_mount_count);
 }
 
 /**
- * tracefs_remove_recursive - recursively removes a directory
+ * tracefs_remove - recursively removes a directory
  * @dentry: a pointer to a the dentry of the directory to be removed.
  *
  * This function recursively removes a directory tree in tracefs that
  * was previously created with a call to another tracefs function
  * (like tracefs_create_file() or variants thereof.)
  */
-void tracefs_remove_recursive(struct dentry *dentry)
+void tracefs_remove(struct dentry *dentry)
 {
-       struct dentry *child, *parent;
-
        if (IS_ERR_OR_NULL(dentry))
                return;
 
-       parent = dentry;
- down:
-       inode_lock(parent->d_inode);
- loop:
-       /*
-        * The parent->d_subdirs is protected by the d_lock. Outside that
-        * lock, the child can be unlinked and set to be freed which can
-        * use the d_u.d_child as the rcu head and corrupt this list.
-        */
-       spin_lock(&parent->d_lock);
-       list_for_each_entry(child, &parent->d_subdirs, d_child) {
-               if (!simple_positive(child))
-                       continue;
-
-               /* perhaps simple_empty(child) makes more sense */
-               if (!list_empty(&child->d_subdirs)) {
-                       spin_unlock(&parent->d_lock);
-                       inode_unlock(parent->d_inode);
-                       parent = child;
-                       goto down;
-               }
-
-               spin_unlock(&parent->d_lock);
-
-               if (!__tracefs_remove(child, parent))
-                       simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-
-               /*
-                * The parent->d_lock protects agaist child from unlinking
-                * from d_subdirs. When releasing the parent->d_lock we can
-                * no longer trust that the next pointer is valid.
-                * Restart the loop. We'll skip this one with the
-                * simple_positive() check.
-                */
-               goto loop;
-       }
-       spin_unlock(&parent->d_lock);
-
-       inode_unlock(parent->d_inode);
-       child = parent;
-       parent = parent->d_parent;
-       inode_lock(parent->d_inode);
-
-       if (child != dentry)
-               /* go up */
-               goto loop;
-
-       if (!__tracefs_remove(child, parent))
-               simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-       inode_unlock(parent->d_inode);
+       simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count);
+       simple_recursive_removal(dentry, remove_one);
+       simple_release_fs(&tracefs_mount, &tracefs_mount_count);
 }
 
 /**
index bf9b6ca..3d013de 100644 (file)
@@ -83,7 +83,7 @@ struct dentry *debugfs_create_automount(const char *name,
                                        void *data);
 
 void debugfs_remove(struct dentry *dentry);
-void debugfs_remove_recursive(struct dentry *dentry);
+#define debugfs_remove_recursive debugfs_remove
 
 const struct file_operations *debugfs_real_fops(const struct file *filp);
 
index 98e0349..9a2be7c 100644 (file)
@@ -3303,6 +3303,8 @@ extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
 extern int simple_rename(struct inode *, struct dentry *,
                         struct inode *, struct dentry *, unsigned int);
+extern void simple_recursive_removal(struct dentry *,
+                              void (*callback)(struct dentry *));
 extern int noop_fsync(struct file *, loff_t, loff_t, int);
 extern int noop_set_page_dirty(struct page *page);
 extern void noop_invalidatepage(struct page *page, unsigned int offset,
index 88d279c..9991244 100644 (file)
@@ -28,7 +28,6 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
 struct dentry *tracefs_create_dir(const char *name, struct dentry *parent);
 
 void tracefs_remove(struct dentry *dentry);
-void tracefs_remove_recursive(struct dentry *dentry);
 
 struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
                                           int (*mkdir)(const char *name),
index 23459d5..84a0046 100644 (file)
@@ -8496,7 +8496,7 @@ static struct trace_array *trace_array_create(const char *name)
 
        ret = event_trace_add_tracer(tr->dir, tr);
        if (ret) {
-               tracefs_remove_recursive(tr->dir);
+               tracefs_remove(tr->dir);
                goto out_free_tr;
        }
 
@@ -8605,7 +8605,7 @@ static int __remove_instance(struct trace_array *tr)
        event_trace_del_tracer(tr);
        ftrace_clear_pids(tr);
        ftrace_destroy_function_files(tr);
-       tracefs_remove_recursive(tr->dir);
+       tracefs_remove(tr->dir);
        free_trace_buffers(tr);
 
        for (i = 0; i < tr->nr_topts; i++) {
index c6de3ce..1faf3a1 100644 (file)
@@ -697,7 +697,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
                return;
 
        if (!--dir->nr_events) {
-               tracefs_remove_recursive(dir->entry);
+               tracefs_remove(dir->entry);
                list_del(&dir->list);
                __put_system_dir(dir);
        }
@@ -716,7 +716,7 @@ static void remove_event_file_dir(struct trace_event_file *file)
                }
                spin_unlock(&dir->d_lock);
 
-               tracefs_remove_recursive(dir);
+               tracefs_remove(dir);
        }
 
        list_del(&file->list);
@@ -3064,7 +3064,7 @@ int event_trace_del_tracer(struct trace_array *tr)
 
        down_write(&trace_event_sem);
        __trace_remove_event_dirs(tr);
-       tracefs_remove_recursive(tr->event_dir);
+       tracefs_remove(tr->event_dir);
        up_write(&trace_event_sem);
 
        tr->event_dir = NULL;
index 6638d63..402d022 100644 (file)
@@ -556,7 +556,7 @@ static int init_tracefs(void)
        return 0;
 
  err:
-       tracefs_remove_recursive(top_dir);
+       tracefs_remove(top_dir);
        return -ENOMEM;
 }