Btrfs-progs: add feature to get mininum size for resizing a fs/device
[platform/upstream/btrfs-progs.git] / cmds-filesystem.c
index 800aa4d..b44a655 100644 (file)
@@ -1271,14 +1271,264 @@ static int cmd_defrag(int argc, char **argv)
 }
 
 static const char * const cmd_resize_usage[] = {
-       "btrfs filesystem resize [devid:][+/-]<newsize>[kKmMgGtTpPeE]|[devid:]max <path>",
+       "btrfs filesystem resize [devid:][+/-]<newsize>[kKmMgGtTpPeE]|[devid:]max|[devid:]get_min_size <path>",
        "Resize a filesystem",
        "If 'max' is passed, the filesystem will occupy all available space",
        "on the device 'devid'.",
+       "If 'get_min_size' is passed, return the minimum size the device can",
+       "be shrunk to.",
        "[kK] means KiB, which denotes 1KiB = 1024B, 1MiB = 1024KiB, etc.",
        NULL
 };
 
+struct dev_extent_elem {
+       u64 start;
+       /* inclusive end */
+       u64 end;
+       struct list_head list;
+};
+
+static int add_dev_extent(struct list_head *list,
+                         const u64 start, const u64 end,
+                         const int append)
+{
+       struct dev_extent_elem *e;
+
+       e = malloc(sizeof(*e));
+       if (!e)
+               return -ENOMEM;
+
+       e->start = start;
+       e->end = end;
+
+       if (append)
+               list_add_tail(&e->list, list);
+       else
+               list_add(&e->list, list);
+
+       return 0;
+}
+
+static void free_dev_extent_list(struct list_head *list)
+{
+       while (!list_empty(list)) {
+               struct dev_extent_elem *e;
+
+               e = list_first_entry(list, struct dev_extent_elem, list);
+               list_del(&e->list);
+               free(e);
+       }
+}
+
+static int hole_includes_sb_mirror(const u64 start, const u64 end)
+{
+       int i;
+       int ret = 0;
+
+       for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+               u64 bytenr = btrfs_sb_offset(i);
+
+               if (bytenr >= start && bytenr <= end) {
+                       ret = 1;
+                       break;
+               }
+       }
+
+       return ret;
+}
+
+static void adjust_dev_min_size(struct list_head *extents,
+                               struct list_head *holes,
+                               u64 *min_size)
+{
+       /*
+        * If relocation of the block group of a device extent must happen (see
+        * below) scratch space is used for the relocation. So track here the
+        * size of the largest device extent that has to be relocated. We track
+        * only the largest and not the sum of the sizes of all relocated block
+        * groups because after each block group is relocated the running
+        * transaction is committed so that pinned space is released.
+        */
+       u64 scratch_space = 0;
+
+       /*
+        * List of device extents is sorted by descending order of the extent's
+        * end offset. If some extent goes beyond the computed minimum size,
+        * which initially matches the sum of the lenghts of all extents,
+        * we need to check if the extent can be relocated to an hole in the
+        * device between [0, *min_size[ (which is what the resize ioctl does).
+        */
+       while (!list_empty(extents)) {
+               struct dev_extent_elem *e;
+               struct dev_extent_elem *h;
+               int found = 0;
+               u64 extent_len;
+               u64 hole_len = 0;
+
+               e = list_first_entry(extents, struct dev_extent_elem, list);
+               if (e->end <= *min_size)
+                       break;
+
+               /*
+                * Our extent goes beyond the computed *min_size. See if we can
+                * find a hole large enough to relocate it to. If not we must stop
+                * and set *min_size to the end of the extent.
+                */
+               extent_len = e->end - e->start + 1;
+               list_for_each_entry(h, holes, list) {
+                       hole_len = h->end - h->start + 1;
+                       if (hole_len >= extent_len) {
+                               found = 1;
+                               break;
+                       }
+               }
+
+               if (!found) {
+                       *min_size = e->end + 1;
+                       break;
+               }
+
+               /*
+                * If the hole found contains the location for a superblock
+                * mirror, we are pessimistic and require allocating one
+                * more extent of the same size. This is because the block
+                * group could be in the worst case used by a single extent
+                * with a size >= (block_group.length - superblock.size).
+                */
+               if (hole_includes_sb_mirror(h->start,
+                                           h->start + extent_len - 1))
+                       *min_size += extent_len;
+
+               if (hole_len > extent_len) {
+                       h->start += extent_len;
+               } else {
+                       list_del(&h->list);
+                       free(h);
+               }
+
+               list_del(&e->list);
+               free(e);
+
+               if (extent_len > scratch_space)
+                       scratch_space = extent_len;
+       }
+
+       if (scratch_space) {
+               *min_size += scratch_space;
+               /*
+                * Chunk allocation requires inserting/updating items in the
+                * chunk tree, so often this can lead to the need of allocating
+                * a new system chunk too, which has a maximum size of 32Mb.
+                */
+               *min_size += 32 * 1024 * 1024;
+       }
+}
+
+static int get_min_size(int fd, DIR *dirstream, const char *amount)
+{
+       int ret = 1;
+       char *p = strstr(amount, ":");
+       u64 devid = 1;
+       /*
+        * Device allocations starts at 1Mb or at the value passed through the
+        * mount option alloc_start if it's bigger than 1Mb. The alloc_start
+        * option is used for debugging and testing only, and recently the
+        * possibility of deprecating/removing it has been discussed, so we
+        * ignore it here.
+        */
+       u64 min_size = 1 * 1024 * 1024ull;
+       struct btrfs_ioctl_search_args args;
+       struct btrfs_ioctl_search_key *sk = &args.key;
+       u64 last_pos = (u64)-1;
+       LIST_HEAD(extents);
+       LIST_HEAD(holes);
+
+       if (p && sscanf(amount, "%llu:get_min_size", &devid) != 1) {
+               fprintf(stderr, "Invalid parameter: %s\n", amount);
+               goto out;
+       }
+
+       memset(&args, 0, sizeof(args));
+       sk->tree_id = BTRFS_DEV_TREE_OBJECTID;
+       sk->min_objectid = devid;
+       sk->max_objectid = devid;
+       sk->max_type = BTRFS_DEV_EXTENT_KEY;
+       sk->min_type = BTRFS_DEV_EXTENT_KEY;
+       sk->min_offset = 0;
+       sk->max_offset = (u64)-1;
+       sk->min_transid = 0;
+       sk->max_transid = (u64)-1;
+       sk->nr_items = 4096;
+
+       while (1) {
+               int i;
+               struct btrfs_ioctl_search_header *sh;
+               unsigned long off = 0;
+
+               ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+               if (ret < 0) {
+                       fprintf(stderr,
+                               "Error invoking tree search ioctl: %s\n",
+                               strerror(errno));
+                       ret = 1;
+                       goto out;
+               }
+
+               if (sk->nr_items == 0)
+                       break;
+
+               for (i = 0; i < sk->nr_items; i++) {
+                       struct btrfs_dev_extent *extent;
+                       u64 len;
+
+                       sh = (struct btrfs_ioctl_search_header *)(args.buf +
+                                                                 off);
+                       off += sizeof(*sh);
+                       extent = (struct btrfs_dev_extent *)(args.buf + off);
+                       off += sh->len;
+
+                       sk->min_objectid = sh->objectid;
+                       sk->min_type = sh->type;
+                       sk->min_offset = sh->offset + 1;
+
+                       if (sh->objectid != devid ||
+                           sh->type != BTRFS_DEV_EXTENT_KEY)
+                               continue;
+
+                       len = btrfs_stack_dev_extent_length(extent);
+                       min_size += len;
+                       ret = add_dev_extent(&extents, sh->offset,
+                                            sh->offset + len - 1, 0);
+
+                       if (!ret && last_pos != (u64)-1 &&
+                           last_pos != sh->offset)
+                               ret = add_dev_extent(&holes, last_pos,
+                                                    sh->offset - 1, 1);
+                       if (ret) {
+                               fprintf(stderr, "Error: %s\n", strerror(-ret));
+                               ret = 1;
+                               goto out;
+                       }
+
+                       last_pos = sh->offset + len;
+               }
+
+               if (sk->min_type != BTRFS_DEV_EXTENT_KEY ||
+                   sk->min_objectid != devid)
+                       break;
+       }
+
+       adjust_dev_min_size(&extents, &holes, &min_size);
+       printf("%llu bytes (%s)\n", min_size, pretty_size(min_size));
+       ret = 0;
+out:
+       close_file_or_dir(fd, dirstream);
+       free_dev_extent_list(&extents);
+       free_dev_extent_list(&holes);
+
+       return ret;
+}
+
 static int cmd_resize(int argc, char **argv)
 {
        struct btrfs_ioctl_vol_args     args;
@@ -1320,6 +1570,9 @@ static int cmd_resize(int argc, char **argv)
                return 1;
        }
 
+       if (strstr(amount, "get_min_size"))
+               return get_min_size(fd, dirstream, amount);
+
        printf("Resize '%s' of '%s'\n", path, amount);
        memset(&args, 0, sizeof(args));
        strncpy_null(args.name, amount);