Btrfs-progs: add feature to get mininum size for resizing a fs/device
[platform/upstream/btrfs-progs.git] / cmds-filesystem.c
index b93bb33..b44a655 100644 (file)
@@ -375,7 +375,7 @@ static void splice_device_list(struct list_head *seed_devices,
 }
 
 static void print_devices(struct btrfs_fs_devices *fs_devices,
-                         u64 *devs_found)
+                         u64 *devs_found, unsigned unit_mode)
 {
        struct btrfs_device *device;
        struct btrfs_fs_devices *cur_fs;
@@ -393,14 +393,16 @@ static void print_devices(struct btrfs_fs_devices *fs_devices,
        list_for_each_entry(device, all_devices, dev_list) {
                printf("\tdevid %4llu size %s used %s path %s\n",
                       (unsigned long long)device->devid,
-                      pretty_size(device->total_bytes),
-                      pretty_size(device->bytes_used), device->name);
+                      pretty_size_mode(device->total_bytes, unit_mode),
+                      pretty_size_mode(device->bytes_used, unit_mode),
+                      device->name);
 
                (*devs_found)++;
        }
 }
 
-static void print_one_uuid(struct btrfs_fs_devices *fs_devices)
+static void print_one_uuid(struct btrfs_fs_devices *fs_devices,
+                          unsigned unit_mode)
 {
        char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
        struct btrfs_device *device;
@@ -421,9 +423,9 @@ static void print_one_uuid(struct btrfs_fs_devices *fs_devices)
        total = device->total_devs;
        printf(" uuid: %s\n\tTotal devices %llu FS bytes used %s\n", uuidbuf,
               (unsigned long long)total,
-              pretty_size(device->super_bytes_used));
+              pretty_size_mode(device->super_bytes_used, unit_mode));
 
-       print_devices(fs_devices, &devs_found);
+       print_devices(fs_devices, &devs_found, unit_mode);
 
        if (devs_found < total) {
                printf("\t*** Some devices missing\n");
@@ -445,7 +447,7 @@ static u64 calc_used_bytes(struct btrfs_ioctl_space_args *si)
 static int print_one_fs(struct btrfs_ioctl_fs_info_args *fs_info,
                struct btrfs_ioctl_dev_info_args *dev_info,
                struct btrfs_ioctl_space_args *space_info,
-               char *label, char *path)
+               char *label, char *path, unsigned unit_mode)
 {
        int i;
        int fd;
@@ -468,7 +470,8 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args *fs_info,
 
        printf(" uuid: %s\n\tTotal devices %llu FS bytes used %s\n", uuidbuf,
                        fs_info->num_devices,
-                       pretty_size(calc_used_bytes(space_info)));
+                       pretty_size_mode(calc_used_bytes(space_info),
+                                        unit_mode));
 
        for (i = 0; i < fs_info->num_devices; i++) {
                char *canonical_path;
@@ -485,8 +488,8 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args *fs_info,
                canonical_path = canonicalize_path((char *)tmp_dev_info->path);
                printf("\tdevid %4llu size %s used %s path %s\n",
                        tmp_dev_info->devid,
-                       pretty_size(tmp_dev_info->total_bytes),
-                       pretty_size(tmp_dev_info->bytes_used),
+                       pretty_size_mode(tmp_dev_info->total_bytes, unit_mode),
+                       pretty_size_mode(tmp_dev_info->bytes_used, unit_mode),
                        canonical_path);
 
                free(canonical_path);
@@ -498,7 +501,7 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args *fs_info,
        return 0;
 }
 
-static int btrfs_scan_kernel(void *search)
+static int btrfs_scan_kernel(void *search, unsigned unit_mode)
 {
        int ret = 0, fd;
        int found = 0;
@@ -537,7 +540,8 @@ static int btrfs_scan_kernel(void *search)
                fd = open(mnt->mnt_dir, O_RDONLY);
                if ((fd != -1) && !get_df(fd, &space_info_arg)) {
                        print_one_fs(&fs_info_arg, dev_info_arg,
-                                       space_info_arg, label, mnt->mnt_dir);
+                                    space_info_arg, label, mnt->mnt_dir,
+                                    unit_mode);
                        kfree(space_info_arg);
                        memset(label, 0, sizeof(label));
                        found = 1;
@@ -816,6 +820,14 @@ static const char * const cmd_show_usage[] = {
        "Show the structure of a filesystem",
        "-d|--all-devices   show only disks under /dev containing btrfs filesystem",
        "-m|--mounted       show only mounted btrfs",
+       "--raw              raw numbers in bytes",
+       "--human-readable   human friendly numbers, base 1024 (default)",
+       "--iec              use 1024 as a base (KiB, MiB, GiB, TiB)",
+       "--si               use 1000 as a base (kB, MB, GB, TB)",
+       "--kbytes           show sizes in KiB, or kB with --si",
+       "--mbytes           show sizes in MiB, or MB with --si",
+       "--gbytes           show sizes in GiB, or GB with --si",
+       "--tbytes           show sizes in TiB, or TB with --si",
        "If no argument is given, structure of all present filesystems is shown.",
        NULL
 };
@@ -829,10 +841,11 @@ static int cmd_show(int argc, char **argv)
        /* default, search both kernel and udev */
        int where = -1;
        int type = 0;
-       char mp[BTRFS_PATH_NAME_MAX + 1];
+       char mp[PATH_MAX];
        char path[PATH_MAX];
        __u8 fsid[BTRFS_FSID_SIZE];
        char uuid_buf[BTRFS_UUID_UNPARSED_SIZE];
+       unsigned unit_mode = UNITS_DEFAULT;
        int found = 0;
 
        while (1) {
@@ -840,6 +853,15 @@ static int cmd_show(int argc, char **argv)
                static const struct option long_options[] = {
                        { "all-devices", no_argument, NULL, 'd'},
                        { "mounted", no_argument, NULL, 'm'},
+                       { "raw", no_argument, NULL, GETOPT_VAL_RAW},
+                       { "kbytes", no_argument, NULL, GETOPT_VAL_KBYTES},
+                       { "mbytes", no_argument, NULL, GETOPT_VAL_MBYTES},
+                       { "gbytes", no_argument, NULL, GETOPT_VAL_GBYTES},
+                       { "tbytes", no_argument, NULL, GETOPT_VAL_TBYTES},
+                       { "si", no_argument, NULL, GETOPT_VAL_SI},
+                       { "iec", no_argument, NULL, GETOPT_VAL_IEC},
+                       { "human-readable", no_argument, NULL,
+                               GETOPT_VAL_HUMAN_READABLE},
                        { NULL, 0, NULL, 0 }
                };
 
@@ -853,6 +875,30 @@ static int cmd_show(int argc, char **argv)
                case 'm':
                        where = BTRFS_SCAN_MOUNTED;
                        break;
+               case GETOPT_VAL_RAW:
+                       units_set_mode(&unit_mode, UNITS_RAW);
+                       break;
+               case GETOPT_VAL_KBYTES:
+                       units_set_base(&unit_mode, UNITS_KBYTES);
+                       break;
+               case GETOPT_VAL_MBYTES:
+                       units_set_base(&unit_mode, UNITS_MBYTES);
+                       break;
+               case GETOPT_VAL_GBYTES:
+                       units_set_base(&unit_mode, UNITS_GBYTES);
+                       break;
+               case GETOPT_VAL_TBYTES:
+                       units_set_base(&unit_mode, UNITS_TBYTES);
+                       break;
+               case GETOPT_VAL_SI:
+                       units_set_mode(&unit_mode, UNITS_DECIMAL);
+                       break;
+               case GETOPT_VAL_IEC:
+                       units_set_mode(&unit_mode, UNITS_BINARY);
+                       break;
+               case GETOPT_VAL_HUMAN_READABLE:
+                       units_set_mode(&unit_mode, UNITS_HUMAN_BINARY);
+                       break;
                default:
                        usage(cmd_show_usage);
                }
@@ -908,7 +954,7 @@ static int cmd_show(int argc, char **argv)
                goto devs_only;
 
        /* show mounted btrfs */
-       ret = btrfs_scan_kernel(search);
+       ret = btrfs_scan_kernel(search, unit_mode);
        if (search && !ret) {
                /* since search is found we are done */
                goto out;
@@ -945,7 +991,7 @@ devs_only:
        }
 
        list_for_each_entry(fs_devices, &all_uuids, list)
-               print_one_uuid(fs_devices);
+               print_one_uuid(fs_devices, unit_mode);
 
        if (search && !found)
                ret = 1;
@@ -1019,7 +1065,7 @@ static const char * const cmd_defrag_usage[] = {
        "-f             flush data to disk immediately after defragmenting",
        "-s start       defragment only from byte onward",
        "-l len         defragment only up to len bytes",
-       "-t size        minimal size of file to be considered for defragmenting",
+       "-t size        target extent size hint",
        NULL
 };
 
@@ -1081,7 +1127,7 @@ static int cmd_defrag(int argc, char **argv)
        int flush = 0;
        u64 start = 0;
        u64 len = (u64)-1;
-       u32 thresh = 0;
+       u64 thresh = 0;
        int i;
        int recursive = 0;
        int ret = 0;
@@ -1124,6 +1170,11 @@ static int cmd_defrag(int argc, char **argv)
                        break;
                case 't':
                        thresh = parse_size(optarg);
+                       if (thresh > (u32)-1) {
+                               fprintf(stderr,
+                       "WARNING: target extent size %llu too big, trimmed to %u",
+                                       thresh, (u32)-1);
+                       }
                        defrag_global_fancy_ioctl = 1;
                        break;
                case 'r':
@@ -1140,7 +1191,7 @@ static int cmd_defrag(int argc, char **argv)
        memset(&defrag_global_range, 0, sizeof(range));
        defrag_global_range.start = start;
        defrag_global_range.len = len;
-       defrag_global_range.extent_thresh = thresh;
+       defrag_global_range.extent_thresh = (u32)thresh;
        if (compress_type) {
                defrag_global_range.flags |= BTRFS_DEFRAG_RANGE_COMPRESS;
                defrag_global_range.compress_type = compress_type;
@@ -1220,14 +1271,264 @@ static int cmd_defrag(int argc, char **argv)
 }
 
 static const char * const cmd_resize_usage[] = {
-       "btrfs filesystem resize [devid:][+/-]<newsize>[kKmMgGtTpPeE]|[devid:]max <path>",
+       "btrfs filesystem resize [devid:][+/-]<newsize>[kKmMgGtTpPeE]|[devid:]max|[devid:]get_min_size <path>",
        "Resize a filesystem",
        "If 'max' is passed, the filesystem will occupy all available space",
        "on the device 'devid'.",
+       "If 'get_min_size' is passed, return the minimum size the device can",
+       "be shrunk to.",
        "[kK] means KiB, which denotes 1KiB = 1024B, 1MiB = 1024KiB, etc.",
        NULL
 };
 
+struct dev_extent_elem {
+       u64 start;
+       /* inclusive end */
+       u64 end;
+       struct list_head list;
+};
+
+static int add_dev_extent(struct list_head *list,
+                         const u64 start, const u64 end,
+                         const int append)
+{
+       struct dev_extent_elem *e;
+
+       e = malloc(sizeof(*e));
+       if (!e)
+               return -ENOMEM;
+
+       e->start = start;
+       e->end = end;
+
+       if (append)
+               list_add_tail(&e->list, list);
+       else
+               list_add(&e->list, list);
+
+       return 0;
+}
+
+static void free_dev_extent_list(struct list_head *list)
+{
+       while (!list_empty(list)) {
+               struct dev_extent_elem *e;
+
+               e = list_first_entry(list, struct dev_extent_elem, list);
+               list_del(&e->list);
+               free(e);
+       }
+}
+
+static int hole_includes_sb_mirror(const u64 start, const u64 end)
+{
+       int i;
+       int ret = 0;
+
+       for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+               u64 bytenr = btrfs_sb_offset(i);
+
+               if (bytenr >= start && bytenr <= end) {
+                       ret = 1;
+                       break;
+               }
+       }
+
+       return ret;
+}
+
+static void adjust_dev_min_size(struct list_head *extents,
+                               struct list_head *holes,
+                               u64 *min_size)
+{
+       /*
+        * If relocation of the block group of a device extent must happen (see
+        * below) scratch space is used for the relocation. So track here the
+        * size of the largest device extent that has to be relocated. We track
+        * only the largest and not the sum of the sizes of all relocated block
+        * groups because after each block group is relocated the running
+        * transaction is committed so that pinned space is released.
+        */
+       u64 scratch_space = 0;
+
+       /*
+        * List of device extents is sorted by descending order of the extent's
+        * end offset. If some extent goes beyond the computed minimum size,
+        * which initially matches the sum of the lenghts of all extents,
+        * we need to check if the extent can be relocated to an hole in the
+        * device between [0, *min_size[ (which is what the resize ioctl does).
+        */
+       while (!list_empty(extents)) {
+               struct dev_extent_elem *e;
+               struct dev_extent_elem *h;
+               int found = 0;
+               u64 extent_len;
+               u64 hole_len = 0;
+
+               e = list_first_entry(extents, struct dev_extent_elem, list);
+               if (e->end <= *min_size)
+                       break;
+
+               /*
+                * Our extent goes beyond the computed *min_size. See if we can
+                * find a hole large enough to relocate it to. If not we must stop
+                * and set *min_size to the end of the extent.
+                */
+               extent_len = e->end - e->start + 1;
+               list_for_each_entry(h, holes, list) {
+                       hole_len = h->end - h->start + 1;
+                       if (hole_len >= extent_len) {
+                               found = 1;
+                               break;
+                       }
+               }
+
+               if (!found) {
+                       *min_size = e->end + 1;
+                       break;
+               }
+
+               /*
+                * If the hole found contains the location for a superblock
+                * mirror, we are pessimistic and require allocating one
+                * more extent of the same size. This is because the block
+                * group could be in the worst case used by a single extent
+                * with a size >= (block_group.length - superblock.size).
+                */
+               if (hole_includes_sb_mirror(h->start,
+                                           h->start + extent_len - 1))
+                       *min_size += extent_len;
+
+               if (hole_len > extent_len) {
+                       h->start += extent_len;
+               } else {
+                       list_del(&h->list);
+                       free(h);
+               }
+
+               list_del(&e->list);
+               free(e);
+
+               if (extent_len > scratch_space)
+                       scratch_space = extent_len;
+       }
+
+       if (scratch_space) {
+               *min_size += scratch_space;
+               /*
+                * Chunk allocation requires inserting/updating items in the
+                * chunk tree, so often this can lead to the need of allocating
+                * a new system chunk too, which has a maximum size of 32Mb.
+                */
+               *min_size += 32 * 1024 * 1024;
+       }
+}
+
+static int get_min_size(int fd, DIR *dirstream, const char *amount)
+{
+       int ret = 1;
+       char *p = strstr(amount, ":");
+       u64 devid = 1;
+       /*
+        * Device allocations starts at 1Mb or at the value passed through the
+        * mount option alloc_start if it's bigger than 1Mb. The alloc_start
+        * option is used for debugging and testing only, and recently the
+        * possibility of deprecating/removing it has been discussed, so we
+        * ignore it here.
+        */
+       u64 min_size = 1 * 1024 * 1024ull;
+       struct btrfs_ioctl_search_args args;
+       struct btrfs_ioctl_search_key *sk = &args.key;
+       u64 last_pos = (u64)-1;
+       LIST_HEAD(extents);
+       LIST_HEAD(holes);
+
+       if (p && sscanf(amount, "%llu:get_min_size", &devid) != 1) {
+               fprintf(stderr, "Invalid parameter: %s\n", amount);
+               goto out;
+       }
+
+       memset(&args, 0, sizeof(args));
+       sk->tree_id = BTRFS_DEV_TREE_OBJECTID;
+       sk->min_objectid = devid;
+       sk->max_objectid = devid;
+       sk->max_type = BTRFS_DEV_EXTENT_KEY;
+       sk->min_type = BTRFS_DEV_EXTENT_KEY;
+       sk->min_offset = 0;
+       sk->max_offset = (u64)-1;
+       sk->min_transid = 0;
+       sk->max_transid = (u64)-1;
+       sk->nr_items = 4096;
+
+       while (1) {
+               int i;
+               struct btrfs_ioctl_search_header *sh;
+               unsigned long off = 0;
+
+               ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+               if (ret < 0) {
+                       fprintf(stderr,
+                               "Error invoking tree search ioctl: %s\n",
+                               strerror(errno));
+                       ret = 1;
+                       goto out;
+               }
+
+               if (sk->nr_items == 0)
+                       break;
+
+               for (i = 0; i < sk->nr_items; i++) {
+                       struct btrfs_dev_extent *extent;
+                       u64 len;
+
+                       sh = (struct btrfs_ioctl_search_header *)(args.buf +
+                                                                 off);
+                       off += sizeof(*sh);
+                       extent = (struct btrfs_dev_extent *)(args.buf + off);
+                       off += sh->len;
+
+                       sk->min_objectid = sh->objectid;
+                       sk->min_type = sh->type;
+                       sk->min_offset = sh->offset + 1;
+
+                       if (sh->objectid != devid ||
+                           sh->type != BTRFS_DEV_EXTENT_KEY)
+                               continue;
+
+                       len = btrfs_stack_dev_extent_length(extent);
+                       min_size += len;
+                       ret = add_dev_extent(&extents, sh->offset,
+                                            sh->offset + len - 1, 0);
+
+                       if (!ret && last_pos != (u64)-1 &&
+                           last_pos != sh->offset)
+                               ret = add_dev_extent(&holes, last_pos,
+                                                    sh->offset - 1, 1);
+                       if (ret) {
+                               fprintf(stderr, "Error: %s\n", strerror(-ret));
+                               ret = 1;
+                               goto out;
+                       }
+
+                       last_pos = sh->offset + len;
+               }
+
+               if (sk->min_type != BTRFS_DEV_EXTENT_KEY ||
+                   sk->min_objectid != devid)
+                       break;
+       }
+
+       adjust_dev_min_size(&extents, &holes, &min_size);
+       printf("%llu bytes (%s)\n", min_size, pretty_size(min_size));
+       ret = 0;
+out:
+       close_file_or_dir(fd, dirstream);
+       free_dev_extent_list(&extents);
+       free_dev_extent_list(&holes);
+
+       return ret;
+}
+
 static int cmd_resize(int argc, char **argv)
 {
        struct btrfs_ioctl_vol_args     args;
@@ -1269,7 +1570,11 @@ static int cmd_resize(int argc, char **argv)
                return 1;
        }
 
+       if (strstr(amount, "get_min_size"))
+               return get_min_size(fd, dirstream, amount);
+
        printf("Resize '%s' of '%s'\n", path, amount);
+       memset(&args, 0, sizeof(args));
        strncpy_null(args.name, amount);
        res = ioctl(fd, BTRFS_IOC_RESIZE, &args);
        e = errno;
@@ -1321,13 +1626,16 @@ static int cmd_label(int argc, char **argv)
        }
 }
 
+static const char filesystem_cmd_group_info[] =
+"overall filesystem tasks and information";
+
 const struct cmd_group filesystem_cmd_group = {
-       filesystem_cmd_group_usage, NULL, {
+       filesystem_cmd_group_usage, filesystem_cmd_group_info, {
                { "df", cmd_filesystem_df, cmd_filesystem_df_usage, NULL, 0 },
                { "show", cmd_show, cmd_show_usage, NULL, 0 },
                { "sync", cmd_sync, cmd_sync_usage, NULL, 0 },
                { "defragment", cmd_defrag, cmd_defrag_usage, NULL, 0 },
-               { "balance", cmd_balance, NULL, &balance_cmd_group, 1 },
+               { "balance", cmd_balance, NULL, &balance_cmd_group, CMD_HIDDEN },
                { "resize", cmd_resize, cmd_resize_usage, NULL, 0 },
                { "label", cmd_label, cmd_label_usage, NULL, 0 },
                { "usage", cmd_filesystem_usage,