+
+static int rmw_eb(struct btrfs_fs_info *info,
+ struct extent_buffer *eb, struct extent_buffer *orig_eb)
+{
+ int ret;
+ unsigned long orig_off = 0;
+ unsigned long dest_off = 0;
+ unsigned long copy_len = eb->len;
+
+ ret = read_whole_eb(info, eb, 0);
+ if (ret)
+ return ret;
+
+ if (eb->start + eb->len <= orig_eb->start ||
+ eb->start >= orig_eb->start + orig_eb->len)
+ return 0;
+ /*
+ * | ----- orig_eb ------- |
+ * | ----- stripe ------- |
+ * | ----- orig_eb ------- |
+ * | ----- orig_eb ------- |
+ */
+ if (eb->start > orig_eb->start)
+ orig_off = eb->start - orig_eb->start;
+ if (orig_eb->start > eb->start)
+ dest_off = orig_eb->start - eb->start;
+
+ if (copy_len > orig_eb->len - orig_off)
+ copy_len = orig_eb->len - orig_off;
+ if (copy_len > eb->len - dest_off)
+ copy_len = eb->len - dest_off;
+
+ memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len);
+ return 0;
+}
+
+static int split_eb_for_raid56(struct btrfs_fs_info *info,
+ struct extent_buffer *orig_eb,
+ struct extent_buffer **ebs,
+ u64 stripe_len, u64 *raid_map,
+ int num_stripes)
+{
+ struct extent_buffer **tmp_ebs;
+ u64 start = orig_eb->start;
+ u64 this_eb_start;
+ int i;
+ int ret = 0;
+
+ tmp_ebs = calloc(num_stripes, sizeof(*tmp_ebs));
+ if (!tmp_ebs)
+ return -ENOMEM;
+
+ /* Alloc memory in a row for data stripes */
+ for (i = 0; i < num_stripes; i++) {
+ if (raid_map[i] >= BTRFS_RAID5_P_STRIPE)
+ break;
+
+ tmp_ebs[i] = calloc(1, sizeof(**tmp_ebs) + stripe_len);
+ if (!tmp_ebs[i]) {
+ ret = -ENOMEM;
+ goto clean_up;
+ }
+ }
+
+ for (i = 0; i < num_stripes; i++) {
+ struct extent_buffer *eb = tmp_ebs[i];
+
+ if (raid_map[i] >= BTRFS_RAID5_P_STRIPE)
+ break;
+
+ eb->start = raid_map[i];
+ eb->len = stripe_len;
+ eb->refs = 1;
+ eb->flags = 0;
+ eb->fd = -1;
+ eb->dev_bytenr = (u64)-1;
+
+ this_eb_start = raid_map[i];
+
+ if (start > this_eb_start ||
+ start + orig_eb->len < this_eb_start + stripe_len) {
+ ret = rmw_eb(info, eb, orig_eb);
+ if (ret)
+ goto clean_up;
+ } else {
+ memcpy(eb->data, orig_eb->data + eb->start - start,
+ stripe_len);
+ }
+ ebs[i] = eb;
+ }
+ free(tmp_ebs);
+ return ret;
+clean_up:
+ for (i = 0; i < num_stripes; i++)
+ free(tmp_ebs[i]);
+ free(tmp_ebs);
+ return ret;
+}
+
+int write_raid56_with_parity(struct btrfs_fs_info *info,
+ struct extent_buffer *eb,
+ struct btrfs_multi_bio *multi,
+ u64 stripe_len, u64 *raid_map)
+{
+ struct extent_buffer **ebs, *p_eb = NULL, *q_eb = NULL;
+ int i;
+ int ret;
+ int alloc_size = eb->len;
+ void **pointers;
+
+ ebs = malloc(sizeof(*ebs) * multi->num_stripes);
+ pointers = malloc(sizeof(*pointers) * multi->num_stripes);
+ if (!ebs || !pointers) {
+ free(ebs);
+ free(pointers);
+ return -ENOMEM;
+ }
+
+ if (stripe_len > alloc_size)
+ alloc_size = stripe_len;
+
+ ret = split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map,
+ multi->num_stripes);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < multi->num_stripes; i++) {
+ struct extent_buffer *new_eb;
+ if (raid_map[i] < BTRFS_RAID5_P_STRIPE) {
+ ebs[i]->dev_bytenr = multi->stripes[i].physical;
+ ebs[i]->fd = multi->stripes[i].dev->fd;
+ multi->stripes[i].dev->total_ios++;
+ if (ebs[i]->start != raid_map[i]) {
+ ret = -EINVAL;
+ goto out_free_split;
+ }
+ continue;
+ }
+ new_eb = malloc(sizeof(*eb) + alloc_size);
+ if (!new_eb) {
+ ret = -ENOMEM;
+ goto out_free_split;
+ }
+ new_eb->dev_bytenr = multi->stripes[i].physical;
+ new_eb->fd = multi->stripes[i].dev->fd;
+ multi->stripes[i].dev->total_ios++;
+ new_eb->len = stripe_len;
+
+ if (raid_map[i] == BTRFS_RAID5_P_STRIPE)
+ p_eb = new_eb;
+ else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE)
+ q_eb = new_eb;
+ }
+ if (q_eb) {
+ ebs[multi->num_stripes - 2] = p_eb;
+ ebs[multi->num_stripes - 1] = q_eb;
+
+ for (i = 0; i < multi->num_stripes; i++)
+ pointers[i] = ebs[i]->data;
+
+ raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers);
+ } else {
+ ebs[multi->num_stripes - 1] = p_eb;
+ for (i = 0; i < multi->num_stripes; i++)
+ pointers[i] = ebs[i]->data;
+ ret = raid5_gen_result(multi->num_stripes, stripe_len,
+ multi->num_stripes - 1, pointers);
+ if (ret < 0)
+ goto out_free_split;
+ }
+
+ for (i = 0; i < multi->num_stripes; i++) {
+ ret = write_extent_to_disk(ebs[i]);
+ if (ret < 0)
+ goto out_free_split;
+ }
+
+out_free_split:
+ for (i = 0; i < multi->num_stripes; i++) {
+ if (ebs[i] != eb)
+ free(ebs[i]);
+ }
+out:
+ free(ebs);
+ free(pointers);
+
+ return ret;
+}
+
+/*
+ * Get stripe length from chunk item and its stripe items
+ *
+ * Caller should only call this function after validating the chunk item
+ * by using btrfs_check_chunk_valid().
+ */
+u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk)
+{
+ u64 stripe_len;
+ u64 chunk_len;
+ u32 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ u64 profile = btrfs_chunk_type(leaf, chunk) &
+ BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+ chunk_len = btrfs_chunk_length(leaf, chunk);
+
+ switch (profile) {
+ case 0: /* Single profile */
+ case BTRFS_BLOCK_GROUP_RAID1:
+ case BTRFS_BLOCK_GROUP_DUP:
+ stripe_len = chunk_len;
+ break;
+ case BTRFS_BLOCK_GROUP_RAID0:
+ stripe_len = chunk_len / num_stripes;
+ break;
+ case BTRFS_BLOCK_GROUP_RAID5:
+ stripe_len = chunk_len / (num_stripes - 1);
+ break;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ stripe_len = chunk_len / (num_stripes - 2);
+ break;
+ case BTRFS_BLOCK_GROUP_RAID10:
+ stripe_len = chunk_len / (num_stripes /
+ btrfs_chunk_sub_stripes(leaf, chunk));
+ break;
+ default:
+ /* Invalid chunk profile found */
+ BUG_ON(1);
+ }
+ return stripe_len;
+}
+
+/*
+ * Return 0 if size of @device is already good
+ * Return >0 if size of @device is not aligned but fixed without problems
+ * Return <0 if something wrong happened when aligning the size of @device
+ */
+int btrfs_fix_device_size(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *device)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct btrfs_root *chunk_root = fs_info->chunk_root;
+ struct btrfs_dev_item *di;
+ u64 old_bytes = device->total_bytes;
+ int ret;
+
+ if (IS_ALIGNED(old_bytes, fs_info->sectorsize))
+ return 0;
+
+ /* Align the in-memory total_bytes first, and use it as correct size */
+ device->total_bytes = round_down(device->total_bytes,
+ fs_info->sectorsize);
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = device->devid;
+
+ trans = btrfs_start_transaction(chunk_root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ error("error starting transaction: %d (%s)",
+ ret, strerror(-ret));
+ return ret;
+ }
+
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
+ if (ret > 0) {
+ error("failed to find DEV_ITEM for devid %llu", device->devid);
+ ret = -ENOENT;
+ goto err;
+ }
+ if (ret < 0) {
+ error("failed to search chunk root: %d (%s)",
+ ret, strerror(-ret));
+ goto err;
+ }
+ di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dev_item);
+ btrfs_set_device_total_bytes(path.nodes[0], di, device->total_bytes);
+ btrfs_mark_buffer_dirty(path.nodes[0]);
+ ret = btrfs_commit_transaction(trans, chunk_root);
+ if (ret < 0) {
+ error("failed to commit current transaction: %d (%s)",
+ ret, strerror(-ret));
+ btrfs_release_path(&path);
+ return ret;
+ }
+ btrfs_release_path(&path);
+ printf("Fixed device size for devid %llu, old size: %llu new size: %llu\n",
+ device->devid, old_bytes, device->total_bytes);
+ return 1;
+
+err:
+ /* We haven't modified anything, it's OK to commit current trans */
+ btrfs_commit_transaction(trans, chunk_root);
+ btrfs_release_path(&path);
+ return ret;
+}
+
+/*
+ * Return 0 if super block total_bytes matches all devices' total_bytes
+ * Return >0 if super block total_bytes mismatch but fixed without problem
+ * Return <0 if we failed to fix super block total_bytes
+ */
+int btrfs_fix_super_size(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_device *device;
+ struct list_head *dev_list = &fs_info->fs_devices->devices;
+ u64 total_bytes = 0;
+ u64 old_bytes = btrfs_super_total_bytes(fs_info->super_copy);
+ int ret;
+
+ list_for_each_entry(device, dev_list, dev_list) {
+ /*
+ * Caller should ensure this function is called after aligning
+ * all devices' total_bytes.
+ */
+ if (!IS_ALIGNED(device->total_bytes, fs_info->sectorsize)) {
+ error("device %llu total_bytes %llu not aligned to %u",
+ device->devid, device->total_bytes,
+ fs_info->sectorsize);
+ return -EUCLEAN;
+ }
+ total_bytes += device->total_bytes;
+ }
+
+ if (total_bytes == old_bytes)
+ return 0;
+
+ btrfs_set_super_total_bytes(fs_info->super_copy, total_bytes);
+
+ /* Commit transaction to update all super blocks */
+ trans = btrfs_start_transaction(fs_info->tree_root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ error("error starting transaction: %d (%s)",
+ ret, strerror(-ret));
+ return ret;
+ }
+ ret = btrfs_commit_transaction(trans, fs_info->tree_root);
+ if (ret < 0) {
+ error("failed to commit current transaction: %d (%s)",
+ ret, strerror(-ret));
+ return ret;
+ }
+ printf("Fixed super total bytes, old size: %llu new size: %llu\n",
+ old_bytes, total_bytes);
+ return 1;
+}
+
+/*
+ * Return 0 if all devices and super block sizes are good
+ * Return >0 if any device/super size problem was found, but fixed
+ * Return <0 if something wrong happened during fixing
+ */
+int btrfs_fix_device_and_super_size(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_device *device;
+ struct list_head *dev_list = &fs_info->fs_devices->devices;
+ bool have_bad_value = false;
+ int ret;
+
+ /* Seed device is not supported yet */
+ if (fs_info->fs_devices->seed) {
+ error("fixing device size with seed device is not supported yet");
+ return -EOPNOTSUPP;
+ }
+
+ /* All devices must be set up before repairing */
+ if (list_empty(dev_list)) {
+ error("no device found");
+ return -ENODEV;
+ }
+ list_for_each_entry(device, dev_list, dev_list) {
+ if (device->fd == -1 || !device->writeable) {
+ error("devid %llu is missing or not writeable",
+ device->devid);
+ error(
+ "fixing device size needs all device(s) to be present and writeable");
+ return -ENODEV;
+ }
+ }
+
+ /* Repair total_bytes of each device */
+ list_for_each_entry(device, dev_list, dev_list) {
+ ret = btrfs_fix_device_size(fs_info, device);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ have_bad_value = true;
+ }
+
+ /* Repair super total_byte */
+ ret = btrfs_fix_super_size(fs_info);
+ if (ret > 0)
+ have_bad_value = true;
+ if (have_bad_value) {
+ printf(
+ "Fixed unaligned/mismatched total_bytes for super block and device items\n");
+ ret = 1;
+ } else {
+ printf("No device size related problem found\n");
+ ret = 0;
+ }
+ return ret;
+}