btrfs: fix race between quota disable and quota assign ioctls
[platform/kernel/linux-rpi.git] / fs / btrfs / qgroup.c
index db680f5..26110d9 100644 (file)
@@ -940,6 +940,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
        int ret = 0;
        int slot;
 
+       /*
+        * We need to have subvol_sem write locked, to prevent races between
+        * concurrent tasks trying to enable quotas, because we will unlock
+        * and relock qgroup_ioctl_lock before setting fs_info->quota_root
+        * and before setting BTRFS_FS_QUOTA_ENABLED.
+        */
+       lockdep_assert_held_write(&fs_info->subvol_sem);
+
        mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (fs_info->quota_root)
                goto out;
@@ -1117,8 +1125,19 @@ out_add_root:
                goto out_free_path;
        }
 
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
+       /*
+        * Commit the transaction while not holding qgroup_ioctl_lock, to avoid
+        * a deadlock with tasks concurrently doing other qgroup operations, such
+        * adding/removing qgroups or adding/deleting qgroup relations for example,
+        * because all qgroup operations first start or join a transaction and then
+        * lock the qgroup_ioctl_lock mutex.
+        * We are safe from a concurrent task trying to enable quotas, by calling
+        * this function, since we are serialized by fs_info->subvol_sem.
+        */
        ret = btrfs_commit_transaction(trans);
        trans = NULL;
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (ret)
                goto out_free_path;
 
@@ -1138,6 +1157,21 @@ out_add_root:
                fs_info->qgroup_rescan_running = true;
                btrfs_queue_work(fs_info->qgroup_rescan_workers,
                                 &fs_info->qgroup_rescan_work);
+       } else {
+               /*
+                * We have set both BTRFS_FS_QUOTA_ENABLED and
+                * BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with
+                * -EINPROGRESS. That can happen because someone started the
+                * rescan worker by calling quota rescan ioctl before we
+                * attempted to initialize the rescan worker. Failure due to
+                * quotas disabled in the meanwhile is not possible, because
+                * we are holding a write lock on fs_info->subvol_sem, which
+                * is also acquired when disabling quotas.
+                * Ignore such error, and any other error would need to undo
+                * everything we did in the transaction we just committed.
+                */
+               ASSERT(ret == -EINPROGRESS);
+               ret = 0;
        }
 
 out_free_path:
@@ -1166,12 +1200,34 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        struct btrfs_trans_handle *trans = NULL;
        int ret = 0;
 
+       /*
+        * We need to have subvol_sem write locked, to prevent races between
+        * concurrent tasks trying to disable quotas, because we will unlock
+        * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
+        */
+       lockdep_assert_held_write(&fs_info->subvol_sem);
+
        mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (!fs_info->quota_root)
                goto out;
+
+       /*
+        * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to
+        * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs
+        * to lock that mutex while holding a transaction handle and the rescan
+        * worker needs to commit a transaction.
+        */
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
 
        /*
+        * Request qgroup rescan worker to complete and wait for it. This wait
+        * must be done before transaction start for quota disable since it may
+        * deadlock with transaction by the qgroup rescan worker.
+        */
+       clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+       btrfs_qgroup_wait_for_completion(fs_info, false);
+
+       /*
         * 1 For the root item
         *
         * We should also reserve enough items for the quota tree deletion in
@@ -1186,14 +1242,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                trans = NULL;
+               set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
                goto out;
        }
 
        if (!fs_info->quota_root)
                goto out;
 
-       clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
-       btrfs_qgroup_wait_for_completion(fs_info, false);
        spin_lock(&fs_info->qgroup_lock);
        quota_root = fs_info->quota_root;
        fs_info->quota_root = NULL;
@@ -1219,7 +1274,8 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        btrfs_tree_lock(quota_root->node);
        btrfs_clean_tree_block(quota_root->node);
        btrfs_tree_unlock(quota_root->node);
-       btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
+       btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
+                             quota_root->node, 0, 1);
 
        btrfs_put_root(quota_root);
 
@@ -2696,13 +2752,22 @@ cleanup:
 }
 
 /*
- * called from commit_transaction. Writes all changed qgroups to disk.
+ * Writes all changed qgroups to disk.
+ * Called by the transaction commit path and the qgroup assign ioctl.
  */
 int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
        int ret = 0;
 
+       /*
+        * In case we are called from the qgroup assign ioctl, assert that we
+        * are holding the qgroup_ioctl_lock, otherwise we can race with a quota
+        * disable operation (ioctl) and access a freed quota root.
+        */
+       if (trans->transaction->state != TRANS_STATE_COMMIT_DOING)
+               lockdep_assert_held(&fs_info->qgroup_ioctl_lock);
+
        if (!fs_info->quota_root)
                return ret;
 
@@ -2847,14 +2912,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
                dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
                dstgroup->rsv_excl = inherit->lim.rsv_excl;
 
-               ret = update_qgroup_limit_item(trans, dstgroup);
-               if (ret) {
-                       fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-                       btrfs_info(fs_info,
-                                  "unable to update quota limit for %llu",
-                                  dstgroup->qgroupid);
-                       goto unlock;
-               }
+               qgroup_dirty(fs_info, dstgroup);
        }
 
        if (srcid) {
@@ -3224,7 +3282,8 @@ out:
 static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
 {
        return btrfs_fs_closing(fs_info) ||
-               test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+               test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
+               !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
 }
 
 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
@@ -3236,6 +3295,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
        int err = -ENOMEM;
        int ret = 0;
        bool stopped = false;
+       bool did_leaf_rescans = false;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -3254,11 +3314,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
                        err = PTR_ERR(trans);
                        break;
                }
-               if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
-                       err = -EINTR;
-               } else {
-                       err = qgroup_rescan_leaf(trans, path);
-               }
+
+               err = qgroup_rescan_leaf(trans, path);
+               did_leaf_rescans = true;
+
                if (err > 0)
                        btrfs_commit_transaction(trans);
                else
@@ -3272,22 +3331,29 @@ out:
        if (err > 0 &&
            fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-       } else if (err < 0) {
+       } else if (err < 0 || stopped) {
                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
        }
        mutex_unlock(&fs_info->qgroup_rescan_lock);
 
        /*
-        * only update status, since the previous part has already updated the
-        * qgroup info.
+        * Only update status, since the previous part has already updated the
+        * qgroup info, and only if we did any actual work. This also prevents
+        * race with a concurrent quota disable, which has already set
+        * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at
+        * btrfs_quota_disable().
         */
-       trans = btrfs_start_transaction(fs_info->quota_root, 1);
-       if (IS_ERR(trans)) {
-               err = PTR_ERR(trans);
+       if (did_leaf_rescans) {
+               trans = btrfs_start_transaction(fs_info->quota_root, 1);
+               if (IS_ERR(trans)) {
+                       err = PTR_ERR(trans);
+                       trans = NULL;
+                       btrfs_err(fs_info,
+                                 "fail to start transaction for status update: %d",
+                                 err);
+               }
+       } else {
                trans = NULL;
-               btrfs_err(fs_info,
-                         "fail to start transaction for status update: %d",
-                         err);
        }
 
        mutex_lock(&fs_info->qgroup_rescan_lock);
@@ -3360,6 +3426,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
                        btrfs_warn(fs_info,
                        "qgroup rescan init failed, qgroup is not enabled");
                        ret = -EINVAL;
+               } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
+                       /* Quota disable is in progress */
+                       ret = -EBUSY;
                }
 
                if (ret) {