btrfs-progs: mkfs: add uuid and otime to ROOT_ITEM of, FS_TREE
[platform/upstream/btrfs-progs.git] / free-space-cache.c
index 35edac2..9b83a71 100644 (file)
 #include "extent_io.h"
 #include "crc32c.h"
 #include "bitops.h"
+#include "internal.h"
+#include "utils.h"
 
-#define CACHE_SECTORSIZE       4096
-#define BITS_PER_BITMAP                (CACHE_SECTORSIZE * 8)
-#define MAX_CACHE_BYTES_PER_GIG        (32 * 1024)
+/*
+ * Kernel always uses PAGE_CACHE_SIZE for sectorsize, but we don't have
+ * anything like that in userspace and have to get the value from the
+ * filesystem
+ */
+#define BITS_PER_BITMAP(sectorsize)            ((sectorsize) * 8)
+#define MAX_CACHE_BYTES_PER_GIG        SZ_32K
 
 static int link_free_space(struct btrfs_free_space_ctl *ctl,
                           struct btrfs_free_space *info);
@@ -48,7 +54,7 @@ static int io_ctl_init(struct io_ctl *io_ctl, u64 size, u64 ino,
                       struct btrfs_root *root)
 {
        memset(io_ctl, 0, sizeof(struct io_ctl));
-       io_ctl->num_pages = (size + CACHE_SECTORSIZE - 1) / CACHE_SECTORSIZE;
+       io_ctl->num_pages = DIV_ROUND_UP(size, root->fs_info->sectorsize);
        io_ctl->buffer = kzalloc(size, GFP_NOFS);
        if (!io_ctl->buffer)
                return -ENOMEM;
@@ -75,11 +81,12 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl)
 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
 {
        BUG_ON(io_ctl->index >= io_ctl->num_pages);
-       io_ctl->cur = io_ctl->buffer + (io_ctl->index++ * CACHE_SECTORSIZE);
+       io_ctl->cur = io_ctl->buffer + (io_ctl->index++ *
+                                       io_ctl->root->fs_info->sectorsize);
        io_ctl->orig = io_ctl->cur;
-       io_ctl->size = CACHE_SECTORSIZE;
+       io_ctl->size = io_ctl->root->fs_info->sectorsize;
        if (clear)
-               memset(io_ctl->cur, 0, CACHE_SECTORSIZE);
+               memset(io_ctl->cur, 0, io_ctl->root->fs_info->sectorsize);
 }
 
 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
@@ -103,7 +110,8 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct btrfs_root *root,
 
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret) {
-               printf("Couldn't find file extent item for free space inode"
+               fprintf(stderr,
+                      "Couldn't find file extent item for free space inode"
                       " %Lu\n", ino);
                btrfs_release_path(path);
                return -EINVAL;
@@ -134,7 +142,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct btrfs_root *root,
                                    struct btrfs_file_extent_item);
                if (btrfs_file_extent_type(path->nodes[0], fi) !=
                    BTRFS_FILE_EXTENT_REG) {
-                       printf("Not the file extent type we wanted\n");
+                       fprintf(stderr, "Not the file extent type we wanted\n");
                        ret = -EINVAL;
                        break;
                }
@@ -203,8 +211,9 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
        val = *tmp;
 
        io_ctl_map_page(io_ctl, 0);
-       crc = crc32c(crc, io_ctl->orig + offset, CACHE_SECTORSIZE - offset);
-       btrfs_csum_final(crc, (char *)&crc);
+       crc = crc32c(crc, io_ctl->orig + offset,
+                       io_ctl->root->fs_info->sectorsize - offset);
+       btrfs_csum_final(crc, (u8 *)&crc);
        if (val != crc) {
                printk("btrfs: csum mismatch on free space cache\n");
                io_ctl_unmap_page(io_ctl);
@@ -250,7 +259,7 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
        if (ret)
                return ret;
 
-       memcpy(entry->bitmap, io_ctl->cur, CACHE_SECTORSIZE);
+       memcpy(entry->bitmap, io_ctl->cur, io_ctl->root->fs_info->sectorsize);
        io_ctl_unmap_page(io_ctl);
 
        return 0;
@@ -291,8 +300,6 @@ static int __load_free_space_cache(struct btrfs_root *root,
                return 0;
        }
 
-       ret = -1;
-
        leaf = path->nodes[0];
        header = btrfs_item_ptr(leaf, path->slots[0],
                                struct btrfs_free_space_header);
@@ -305,15 +312,23 @@ static int __load_free_space_cache(struct btrfs_root *root,
 
        ret = btrfs_search_slot(NULL, root, &inode_location, path, 0, 0);
        if (ret) {
-               printf("Couldn't find free space inode %d\n", ret);
+               fprintf(stderr, "Couldn't find free space inode %d\n", ret);
                return 0;
        }
 
        leaf = path->nodes[0];
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
+
+       inode_size = btrfs_inode_size(leaf, inode_item);
+       if (!inode_size || !btrfs_inode_generation(leaf, inode_item)) {
+               btrfs_release_path(path);
+               return 0;
+       }
+
        if (btrfs_inode_generation(leaf, inode_item) != generation) {
-               printf("free space inode generation (%llu) did not match "
+               fprintf(stderr,
+                      "free space inode generation (%llu) did not match "
                       "free space cache generation (%llu)\n",
                       (unsigned long long)btrfs_inode_generation(leaf,
                                                                  inode_item),
@@ -322,10 +337,7 @@ static int __load_free_space_cache(struct btrfs_root *root,
                return 0;
        }
 
-       inode_size = btrfs_inode_size(leaf, inode_item);
        btrfs_release_path(path);
-       if (inode_size == 0)
-               return 0;
 
        if (!num_entries)
                return 0;
@@ -366,14 +378,15 @@ static int __load_free_space_cache(struct btrfs_root *root,
                if (type == BTRFS_FREE_SPACE_EXTENT) {
                        ret = link_free_space(ctl, e);
                        if (ret) {
-                               printf("Duplicate entries in free space cache, dumping");
+                               fprintf(stderr,
+                                      "Duplicate entries in free space cache\n");
                                free(e);
                                goto free_cache;
                        }
                } else {
                        BUG_ON(!num_bitmaps);
                        num_bitmaps--;
-                       e->bitmap = kzalloc(CACHE_SECTORSIZE, GFP_NOFS);
+                       e->bitmap = kzalloc(ctl->sectorsize, GFP_NOFS);
                        if (!e->bitmap) {
                                free(e);
                                goto free_cache;
@@ -381,7 +394,8 @@ static int __load_free_space_cache(struct btrfs_root *root,
                        ret = link_free_space(ctl, e);
                        ctl->total_bitmaps++;
                        if (ret) {
-                               printf("Duplicate entries in free space cache, dumping");
+                               fprintf(stderr,
+                                      "Duplicate entries in free space cache\n");
                                free(e->bitmap);
                                free(e);
                                goto free_cache;
@@ -422,7 +436,10 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
 {
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct btrfs_path *path;
+       u64 used = btrfs_block_group_used(&block_group->item);
        int ret = 0;
+       u64 bg_free;
+       s64 diff;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -432,11 +449,37 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
                                      block_group->key.objectid);
        btrfs_free_path(path);
 
+       bg_free = block_group->key.offset - used - block_group->bytes_super;
+       diff = ctl->free_space - bg_free;
+       if (ret == 1 && diff) {
+               fprintf(stderr,
+                      "block group %llu has wrong amount of free space, free space cache has %llu block group has %llu\n",
+                      block_group->key.objectid, ctl->free_space, bg_free);
+               __btrfs_remove_free_space_cache(ctl);
+               /*
+                * Due to btrfs_reserve_extent() can happen out of a
+                * transaction, but all btrfs_release_extent() happens inside
+                * a transaction, so under heavy race it's possible that free
+                * space cache has less free space, and both kernel just discard
+                * such cache. But if we find some case where free space cache
+                * has more free space, this means under certain case such
+                * cache can be loaded and cause double allocate.
+                *
+                * Detect such possibility here.
+                */
+               if (diff > 0)
+                       error(
+"free space cache has more free space than block group item, this could leads to serious corruption, please contact btrfs developers");
+               ret = -1;
+       }
+
        if (ret < 0) {
-               ret = 0;
+               if (diff <= 0)
+                       ret = 0;
 
-               printf("failed to load free space cache for block group %llu",
-                       block_group->key.objectid);
+               fprintf(stderr,
+                      "failed to load free space cache for block group %llu\n",
+                      block_group->key.objectid);
        }
 
        return ret;
@@ -455,21 +498,6 @@ static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
        return (unsigned long)(bytes / unit);
 }
 
-static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
-                                  u64 offset)
-{
-       u64 bitmap_start;
-       u64 bytes_per_bitmap;
-
-       bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
-       bitmap_start = offset - ctl->start;
-       bitmap_start = bitmap_start / bytes_per_bitmap;
-       bitmap_start *= bytes_per_bitmap;
-       bitmap_start += ctl->start;
-
-       return bitmap_start;
-}
-
 static int tree_insert_offset(struct rb_root *root, u64 offset,
                              struct rb_node *node, int bitmap)
 {
@@ -530,6 +558,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
 {
        struct rb_node *n = ctl->free_space_offset.rb_node;
        struct btrfs_free_space *entry, *prev = NULL;
+       u32 sectorsize = ctl->sectorsize;
 
        /* find entry that is closest to the 'offset' */
        while (1) {
@@ -614,7 +643,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
                            prev->offset + prev->bytes > offset)
                                return prev;
                }
-               if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
+               if (entry->offset + BITS_PER_BITMAP(sectorsize) * ctl->unit > offset)
                        return entry;
        } else if (entry->offset + entry->bytes > offset)
                return entry;
@@ -624,7 +653,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
 
        while (1) {
                if (entry->bitmap) {
-                       if (entry->offset + BITS_PER_BITMAP *
+                       if (entry->offset + BITS_PER_BITMAP(sectorsize) *
                            ctl->unit > offset)
                                break;
                } else {
@@ -671,14 +700,15 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
        unsigned long found_bits = 0;
        unsigned long bits, i;
        unsigned long next_zero;
+       u32 sectorsize = ctl->sectorsize;
 
        i = offset_to_bit(bitmap_info->offset, ctl->unit,
                          max_t(u64, *offset, bitmap_info->offset));
        bits = bytes_to_bits(*bytes, ctl->unit);
 
-       for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
+       for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP(sectorsize)) {
                next_zero = find_next_zero_bit(bitmap_info->bitmap,
-                                              BITS_PER_BITMAP, i);
+                                              BITS_PER_BITMAP(sectorsize), i);
                if ((next_zero - i) >= bits) {
                        found_bits = next_zero - i;
                        break;
@@ -765,6 +795,7 @@ int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
        if (!ctl)
                return -ENOMEM;
 
+       ctl->sectorsize = sectorsize;
        ctl->unit = sectorsize;
        ctl->start = block_group->key.objectid;
        ctl->private = block_group;
@@ -781,8 +812,7 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
        while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
                info = rb_entry(node, struct btrfs_free_space, offset_index);
                unlink_free_space(ctl, info);
-               if (info->bitmap)
-                       free(info->bitmap);
+               free(info->bitmap);
                free(info);
        }
 }
@@ -826,6 +856,7 @@ static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
        struct btrfs_free_space *e, *prev = NULL;
        struct rb_node *n;
        int ret;
+       u32 sectorsize = ctl->sectorsize;
 
 again:
        prev = NULL;
@@ -835,7 +866,7 @@ again:
                        u64 offset = e->offset, bytes = ctl->unit;
                        u64 end;
 
-                       end = e->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
+                       end = e->offset + (u64)(BITS_PER_BITMAP(sectorsize) * ctl->unit);
 
                        unlink_free_space(ctl, e);
                        while (!(search_bitmap(ctl, e, &offset, &bytes))) {
@@ -865,3 +896,129 @@ next:
                prev = e;
        }
 }
+
+int btrfs_clear_free_space_cache(struct btrfs_fs_info *fs_info,
+                                struct btrfs_block_group_cache *bg)
+{
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_path path;
+       struct btrfs_key key;
+       struct btrfs_disk_key location;
+       struct btrfs_free_space_header *sc_header;
+       struct extent_buffer *node;
+       u64 ino;
+       int slot;
+       int ret;
+
+       trans = btrfs_start_transaction(tree_root, 1);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       btrfs_init_path(&path);
+
+       key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+       key.type = 0;
+       key.offset = bg->key.objectid;
+
+       ret = btrfs_search_slot(trans, tree_root, &key, &path, -1, 1);
+       if (ret > 0) {
+               ret = 0;
+               goto out;
+       }
+       if (ret < 0)
+               goto out;
+
+       node = path.nodes[0];
+       slot = path.slots[0];
+       sc_header = btrfs_item_ptr(node, slot, struct btrfs_free_space_header);
+       btrfs_free_space_key(node, sc_header, &location);
+       ino = btrfs_disk_key_objectid(&location);
+
+       /* Delete the free space header, as we have the ino to continue */
+       ret = btrfs_del_item(trans, tree_root, &path);
+       if (ret < 0) {
+               error("failed to remove free space header for block group %llu: %d",
+                     bg->key.objectid, ret);
+               goto out;
+       }
+       btrfs_release_path(&path);
+
+       /* Iterate from the end of the free space cache inode */
+       key.objectid = ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = (u64)-1;
+       ret = btrfs_search_slot(trans, tree_root, &key, &path, -1, 1);
+       if (ret < 0) {
+               error("failed to locate free space cache extent for block group %llu: %d",
+                     bg->key.objectid, ret);
+               goto out;
+       }
+       while (1) {
+               struct btrfs_file_extent_item *fi;
+               u64 disk_bytenr;
+               u64 disk_num_bytes;
+
+               ret = btrfs_previous_item(tree_root, &path, ino,
+                                         BTRFS_EXTENT_DATA_KEY);
+               if (ret > 0) {
+                       ret = 0;
+                       break;
+               }
+               if (ret < 0) {
+                       error(
+       "failed to locate free space cache extent for block group %llu: %d",
+                               bg->key.objectid, ret);
+                       goto out;
+               }
+               node = path.nodes[0];
+               slot = path.slots[0];
+               btrfs_item_key_to_cpu(node, &key, slot);
+               fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
+               disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
+               disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
+
+               ret = btrfs_free_extent(trans, tree_root, disk_bytenr,
+                                       disk_num_bytes, 0, tree_root->objectid,
+                                       ino, key.offset);
+               if (ret < 0) {
+                       error("failed to remove backref for disk bytenr %llu: %d",
+                             disk_bytenr, ret);
+                       goto out;
+               }
+               ret = btrfs_del_item(trans, tree_root, &path);
+               if (ret < 0) {
+                       error(
+       "failed to remove free space extent data for ino %llu offset %llu: %d",
+                             ino, key.offset, ret);
+                       goto out;
+               }
+       }
+       btrfs_release_path(&path);
+
+       /* Now delete free space cache inode item */
+       key.objectid = ino;
+       key.type = BTRFS_INODE_ITEM_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(trans, tree_root, &key, &path, -1, 1);
+       if (ret > 0)
+               warning("free space inode %llu not found, ignore", ino);
+       if (ret < 0) {
+               error(
+       "failed to locate free space cache inode %llu for block group %llu: %d",
+                     ino, bg->key.objectid, ret);
+               goto out;
+       }
+       ret = btrfs_del_item(trans, tree_root, &path);
+       if (ret < 0) {
+               error(
+       "failed to delete free space cache inode %llu for block group %llu: %d",
+                     ino, bg->key.objectid, ret);
+       }
+out:
+       btrfs_release_path(&path);
+       if (!ret)
+               btrfs_commit_transaction(trans, tree_root);
+       return ret;
+}