#include <linux/semaphore.h>
#include <linux/uuid.h>
#include <linux/list_sort.h>
+#include <linux/namei.h>
#include "misc.h"
#include "ctree.h"
#include "extent_map.h"
return ret;
}
-static bool device_path_matched(const char *path, struct btrfs_device *device)
+/*
+ * Check if the device in the path matches the device in the given struct device.
+ *
+ * Returns:
+ * true If it is the same device.
+ * false If it is not the same device or on error.
+ */
+static bool device_matched(const struct btrfs_device *device, const char *path)
{
- int found;
+ char *device_name;
+ dev_t dev_old;
+ dev_t dev_new;
+ int ret;
+
+ /*
+ * If we are looking for a device with the matching dev_t, then skip
+ * device without a name (a missing device).
+ */
+ if (!device->name)
+ return false;
+
+ device_name = kzalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
+ if (!device_name)
+ return false;
rcu_read_lock();
- found = strcmp(rcu_str_deref(device->name), path);
+ scnprintf(device_name, BTRFS_PATH_NAME_MAX, "%s", rcu_str_deref(device->name));
rcu_read_unlock();
- return found == 0;
+ ret = lookup_bdev(device_name, &dev_old);
+ kfree(device_name);
+ if (ret)
+ return false;
+
+ ret = lookup_bdev(path, &dev_new);
+ if (ret)
+ return false;
+
+ if (dev_old == dev_new)
+ return true;
+
+ return false;
}
/*
&fs_devices->devices, dev_list) {
if (skip_device && skip_device == device)
continue;
- if (path && !device->name)
- continue;
- if (path && !device_path_matched(path, device))
+ if (path && !device_matched(device, path))
continue;
if (fs_devices->opened) {
/* for an already deleted device return 0 */
list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
__btrfs_free_extra_devids(seed_dev, &latest_dev);
- fs_devices->latest_bdev = latest_dev->bdev;
+ fs_devices->latest_dev = latest_dev;
mutex_unlock(&uuid_mutex);
}
if (device->devid == BTRFS_DEV_REPLACE_DEVID)
clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
- if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
+ clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
fs_devices->missing_devices--;
+ }
btrfs_close_bdev(device);
if (device->bdev) {
return -EINVAL;
fs_devices->opened = 1;
- fs_devices->latest_bdev = latest_dev->bdev;
+ fs_devices->latest_dev = latest_dev;
fs_devices->total_rw_bytes = 0;
fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
fs_devices->read_policy = BTRFS_READ_POLICY_PID;
bytenr_orig = btrfs_sb_offset(0);
ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
- if (ret)
- return ERR_PTR(ret);
+ if (ret) {
+ device = ERR_PTR(ret);
+ goto error_bdev_put;
+ }
disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
if (IS_ERR(disk_super)) {
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
+ btrfs_reserve_chunk_metadata(trans, true);
ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
&key, sizeof(*dev_item));
+ btrfs_trans_release_chunk_metadata(trans);
if (ret)
goto out;
/*
* Function to update ctime/mtime for a given device path.
* Mainly used for ctime/mtime based probe like libblkid.
+ *
+ * We don't care about errors here, this is just to be kind to userspace.
*/
-static void update_dev_time(struct block_device *bdev)
+static void update_dev_time(const char *device_path)
{
- struct inode *inode = bdev->bd_inode;
+ struct path path;
struct timespec64 now;
+ int ret;
- /* Shouldn't happen but just in case. */
- if (!inode)
+ ret = kern_path(device_path, LOOKUP_FOLLOW, &path);
+ if (ret)
return;
- now = current_time(inode);
- generic_update_time(inode, &now, S_MTIME | S_CTIME);
+ now = current_time(d_inode(path.dentry));
+ inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME);
+ path_put(&path);
}
static int btrfs_rm_dev_item(struct btrfs_device *device)
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
+ btrfs_reserve_chunk_metadata(trans, false);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ btrfs_trans_release_chunk_metadata(trans);
if (ret) {
if (ret > 0)
ret = -ENOENT;
}
/*
- * Helper function to check if the given device is part of s_bdev / latest_bdev
+ * Helper function to check if the given device is part of s_bdev / latest_dev
* and replace it with the provided or the next active device, in the context
* where this function called, there should be always be another device (or
* this_dev) which is active.
(fs_info->sb->s_bdev == device->bdev))
fs_info->sb->s_bdev = next_device->bdev;
- if (fs_info->fs_devices->latest_bdev == device->bdev)
- fs_info->fs_devices->latest_bdev = next_device->bdev;
+ if (fs_info->fs_devices->latest_dev->bdev == device->bdev)
+ fs_info->fs_devices->latest_dev = next_device;
}
/*
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
/* Update ctime/mtime for device path for libblkid */
- update_dev_time(bdev);
+ update_dev_time(device_path);
}
int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
u64 num_devices;
int ret = 0;
- mutex_lock(&uuid_mutex);
-
+ /*
+ * The device list in fs_devices is accessed without locks (neither
+ * uuid_mutex nor device_list_mutex) as it won't change on a mounted
+ * filesystem and another device rm cannot run.
+ */
num_devices = btrfs_num_devices(fs_info);
ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
mutex_unlock(&fs_info->chunk_mutex);
}
- mutex_unlock(&uuid_mutex);
ret = btrfs_shrink_device(device, 0);
if (!ret)
btrfs_reada_remove_dev(device);
- mutex_lock(&uuid_mutex);
if (ret)
goto error_undo;
}
out:
- mutex_unlock(&uuid_mutex);
return ret;
error_undo:
mutex_unlock(&fs_devices->device_list_mutex);
- /*
- * The update_dev_time() with in btrfs_scratch_superblocks()
- * may lead to a call to btrfs_show_devname() which will try
- * to hold device_list_mutex. And here this device
- * is already out of device list, so we don't have to hold
- * the device_list_mutex lock.
- */
btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
tgtdev->name->str);
key.type = BTRFS_DEV_ITEM_KEY;
while (1) {
+ btrfs_reserve_chunk_metadata(trans, false);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ btrfs_trans_release_chunk_metadata(trans);
if (ret < 0)
goto error;
device->fs_info = fs_info;
device->bdev = bdev;
- ret = btrfs_get_dev_zone_info(device);
+ ret = btrfs_get_dev_zone_info(device, false);
if (ret)
goto error_free_device;
btrfs_abort_transaction(trans, ret);
goto error_trans;
}
+ btrfs_assign_next_active_device(fs_info->fs_devices->latest_dev,
+ device);
}
device->fs_devices = fs_devices;
btrfs_forget_devices(device_path);
/* Update ctime/mtime for blkid or udev */
- update_dev_time(bdev);
+ update_dev_time(device_path);
return ret;
struct btrfs_super_block *super_copy = fs_info->super_copy;
u64 old_total;
u64 diff;
+ int ret;
if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
return -EACCES;
&trans->transaction->dev_update_list);
mutex_unlock(&fs_info->chunk_mutex);
- return btrfs_update_device(trans, device);
+ btrfs_reserve_chunk_metadata(trans, false);
+ ret = btrfs_update_device(trans, device);
+ btrfs_trans_release_chunk_metadata(trans);
+
+ return ret;
}
static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
const u64 sys_flags = btrfs_system_alloc_profile(fs_info);
struct btrfs_block_group *sys_bg;
- sys_bg = btrfs_alloc_chunk(trans, sys_flags);
+ sys_bg = btrfs_create_chunk(trans, sys_flags);
if (IS_ERR(sys_bg)) {
ret = PTR_ERR(sys_bg);
btrfs_abort_transaction(trans, ret);
struct btrfs_fs_info *fs_info = data;
int ret = 0;
+ sb_start_write(fs_info->sb);
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl)
ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
mutex_unlock(&fs_info->balance_mutex);
+ sb_end_write(fs_info->sb);
return ret;
}
round_down(old_total - diff, fs_info->sectorsize));
mutex_unlock(&fs_info->chunk_mutex);
+ btrfs_reserve_chunk_metadata(trans, false);
/* Now btrfs_update_device() will change the on-disk size. */
ret = btrfs_update_device(trans, device);
+ btrfs_trans_release_chunk_metadata(trans);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
}
/*
- * Structure used internally for __btrfs_alloc_chunk() function.
+ * Structure used internally for btrfs_create_chunk() function.
* Wraps needed parameters.
*/
struct alloc_chunk_ctl {
return block_group;
}
-struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
u64 type)
{
struct btrfs_fs_info *info = trans->fs_info;
*/
alloc_profile = btrfs_metadata_alloc_profile(fs_info);
- meta_bg = btrfs_alloc_chunk(trans, alloc_profile);
+ meta_bg = btrfs_create_chunk(trans, alloc_profile);
if (IS_ERR(meta_bg))
return PTR_ERR(meta_bg);
alloc_profile = btrfs_system_alloc_profile(fs_info);
- sys_bg = btrfs_alloc_chunk(trans, alloc_profile);
+ sys_bg = btrfs_create_chunk(trans, alloc_profile);
if (IS_ERR(sys_bg))
return PTR_ERR(sys_bg);
fs_info->fs_devices->total_rw_bytes = 0;
/*
+ * Lockdep complains about possible circular locking dependency between
+ * a disk's open_mutex (struct gendisk.open_mutex), the rw semaphores
+ * used for freeze procection of a fs (struct super_block.s_writers),
+ * which we take when starting a transaction, and extent buffers of the
+ * chunk tree if we call read_one_dev() while holding a lock on an
+ * extent buffer of the chunk tree. Since we are mounting the filesystem
+ * and at this point there can't be any concurrent task modifying the
+ * chunk tree, to keep it simple, just skip locking on the chunk tree.
+ */
+ ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
+ path->skip_locking = 1;
+
+ /*
* Read all device items, and then all the chunk items. All
* device items are found before any chunk item (their object id
* is smaller than the lowest possible object id for a chunk
goto error;
break;
}
- /*
- * The nodes on level 1 are not locked but we don't need to do
- * that during mount time as nothing else can access the tree
- */
node = path->nodes[1];
if (node) {
if (last_ra_node != node->start) {
* requirement for chunk allocation, see the comment on
* top of btrfs_chunk_alloc() for details.
*/
- ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
ret = read_one_chunk(&found_key, leaf, chunk);
if (ret)
* do another round of validation checks.
*/
if (total_dev != fs_info->fs_devices->total_devices) {
- btrfs_err(fs_info,
- "super_num_devices %llu mismatch with num_devices %llu found here",
+ btrfs_warn(fs_info,
+"super block num_devices %llu mismatch with DEV_ITEM count %llu, will be repaired on next transaction commit",
btrfs_super_num_devices(fs_info->super_copy),
total_dev);
- ret = -EINVAL;
- goto error;
+ fs_info->fs_devices->total_devices = total_dev;
+ btrfs_set_super_num_devices(fs_info->super_copy, total_dev);
}
if (btrfs_super_total_bytes(fs_info->super_copy) <
fs_info->fs_devices->total_rw_bytes) {
target = cache->start;
btrfs_put_block_group(cache);
+ sb_start_write(fs_info->sb);
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
btrfs_info(fs_info,
"zoned: skip relocating block group %llu to repair: EBUSY",
target);
+ sb_end_write(fs_info->sb);
return -EBUSY;
}
btrfs_put_block_group(cache);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);
+ sb_end_write(fs_info->sb);
return ret;
}