2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include "kerncompat.h"
21 #include "free-space-cache.h"
22 #include "transaction.h"
24 #include "extent_io.h"
31 * Kernel always uses PAGE_CACHE_SIZE for sectorsize, but we don't have
32 * anything like that in userspace and have to get the value from the
35 #define BITS_PER_BITMAP(sectorsize) ((sectorsize) * 8)
36 #define MAX_CACHE_BYTES_PER_GIG SZ_32K
38 static int link_free_space(struct btrfs_free_space_ctl *ctl,
39 struct btrfs_free_space *info);
40 static void merge_space_tree(struct btrfs_free_space_ctl *ctl);
45 struct btrfs_root *root;
50 unsigned check_crcs:1;
53 static int io_ctl_init(struct io_ctl *io_ctl, u64 size, u64 ino,
54 struct btrfs_root *root)
56 memset(io_ctl, 0, sizeof(struct io_ctl));
57 io_ctl->num_pages = (size + root->fs_info->sectorsize - 1) /
58 root->fs_info->sectorsize;
59 io_ctl->buffer = kzalloc(size, GFP_NOFS);
62 io_ctl->total_size = size;
64 if (ino != BTRFS_FREE_INO_OBJECTID)
65 io_ctl->check_crcs = 1;
69 static void io_ctl_free(struct io_ctl *io_ctl)
71 kfree(io_ctl->buffer);
74 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
82 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
84 BUG_ON(io_ctl->index >= io_ctl->num_pages);
85 io_ctl->cur = io_ctl->buffer + (io_ctl->index++ *
86 io_ctl->root->fs_info->sectorsize);
87 io_ctl->orig = io_ctl->cur;
88 io_ctl->size = io_ctl->root->fs_info->sectorsize;
90 memset(io_ctl->cur, 0, io_ctl->root->fs_info->sectorsize);
93 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
95 io_ctl_unmap_page(io_ctl);
98 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct btrfs_root *root,
99 struct btrfs_path *path, u64 ino)
101 struct extent_buffer *leaf;
102 struct btrfs_file_extent_item *fi;
103 struct btrfs_key key;
109 key.type = BTRFS_EXTENT_DATA_KEY;
112 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
115 "Couldn't find file extent item for free space inode"
117 btrfs_release_path(path);
121 while (total_read < io_ctl->total_size) {
122 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
123 ret = btrfs_next_leaf(root, path);
129 leaf = path->nodes[0];
131 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
132 if (key.objectid != ino) {
137 if (key.type != BTRFS_EXTENT_DATA_KEY) {
142 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
143 struct btrfs_file_extent_item);
144 if (btrfs_file_extent_type(path->nodes[0], fi) !=
145 BTRFS_FILE_EXTENT_REG) {
146 fprintf(stderr, "Not the file extent type we wanted\n");
151 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi) +
152 btrfs_file_extent_offset(leaf, fi);
153 len = btrfs_file_extent_num_bytes(leaf, fi);
154 ret = read_data_from_disk(root->fs_info,
155 io_ctl->buffer + key.offset, bytenr,
163 btrfs_release_path(path);
167 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
172 * Skip the crc area. If we don't check crcs then we just have a 64bit
173 * chunk at the front of the first page.
175 if (io_ctl->check_crcs) {
176 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
177 io_ctl->size -= sizeof(u64) +
178 (sizeof(u32) * io_ctl->num_pages);
180 io_ctl->cur += sizeof(u64);
181 io_ctl->size -= sizeof(u64) * 2;
185 if (le64_to_cpu(*gen) != generation) {
186 printk("btrfs: space cache generation "
187 "(%Lu) does not match inode (%Lu)\n", *gen,
189 io_ctl_unmap_page(io_ctl);
192 io_ctl->cur += sizeof(u64);
196 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
202 if (!io_ctl->check_crcs) {
203 io_ctl_map_page(io_ctl, 0);
208 offset = sizeof(u32) * io_ctl->num_pages;
210 tmp = io_ctl->buffer;
214 io_ctl_map_page(io_ctl, 0);
215 crc = crc32c(crc, io_ctl->orig + offset,
216 io_ctl->root->fs_info->sectorsize - offset);
217 btrfs_csum_final(crc, (u8 *)&crc);
219 printk("btrfs: csum mismatch on free space cache\n");
220 io_ctl_unmap_page(io_ctl);
227 static int io_ctl_read_entry(struct io_ctl *io_ctl,
228 struct btrfs_free_space *entry, u8 *type)
230 struct btrfs_free_space_entry *e;
234 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
240 entry->offset = le64_to_cpu(e->offset);
241 entry->bytes = le64_to_cpu(e->bytes);
243 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
244 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
246 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
249 io_ctl_unmap_page(io_ctl);
254 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
255 struct btrfs_free_space *entry)
259 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
263 memcpy(entry->bitmap, io_ctl->cur, io_ctl->root->fs_info->sectorsize);
264 io_ctl_unmap_page(io_ctl);
270 static int __load_free_space_cache(struct btrfs_root *root,
271 struct btrfs_free_space_ctl *ctl,
272 struct btrfs_path *path, u64 offset)
274 struct btrfs_free_space_header *header;
275 struct btrfs_inode_item *inode_item;
276 struct extent_buffer *leaf;
277 struct io_ctl io_ctl;
278 struct btrfs_key key;
279 struct btrfs_key inode_location;
280 struct btrfs_disk_key disk_key;
281 struct btrfs_free_space *e, *n;
282 struct list_head bitmaps;
290 INIT_LIST_HEAD(&bitmaps);
292 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
296 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
299 } else if (ret > 0) {
300 btrfs_release_path(path);
304 leaf = path->nodes[0];
305 header = btrfs_item_ptr(leaf, path->slots[0],
306 struct btrfs_free_space_header);
307 num_entries = btrfs_free_space_entries(leaf, header);
308 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
309 generation = btrfs_free_space_generation(leaf, header);
310 btrfs_free_space_key(leaf, header, &disk_key);
311 btrfs_disk_key_to_cpu(&inode_location, &disk_key);
312 btrfs_release_path(path);
314 ret = btrfs_search_slot(NULL, root, &inode_location, path, 0, 0);
316 fprintf(stderr, "Couldn't find free space inode %d\n", ret);
320 leaf = path->nodes[0];
321 inode_item = btrfs_item_ptr(leaf, path->slots[0],
322 struct btrfs_inode_item);
324 inode_size = btrfs_inode_size(leaf, inode_item);
325 if (!inode_size || !btrfs_inode_generation(leaf, inode_item)) {
326 btrfs_release_path(path);
330 if (btrfs_inode_generation(leaf, inode_item) != generation) {
332 "free space inode generation (%llu) did not match "
333 "free space cache generation (%llu)\n",
334 (unsigned long long)btrfs_inode_generation(leaf,
336 (unsigned long long)generation);
337 btrfs_release_path(path);
341 btrfs_release_path(path);
346 ret = io_ctl_init(&io_ctl, inode_size, inode_location.objectid, root);
350 ret = io_ctl_prepare_pages(&io_ctl, root, path,
351 inode_location.objectid);
355 ret = io_ctl_check_crc(&io_ctl, 0);
359 ret = io_ctl_check_generation(&io_ctl, generation);
363 while (num_entries) {
364 e = calloc(1, sizeof(*e));
368 ret = io_ctl_read_entry(&io_ctl, e, &type);
379 if (type == BTRFS_FREE_SPACE_EXTENT) {
380 ret = link_free_space(ctl, e);
383 "Duplicate entries in free space cache\n");
388 BUG_ON(!num_bitmaps);
390 e->bitmap = kzalloc(ctl->sectorsize, GFP_NOFS);
395 ret = link_free_space(ctl, e);
396 ctl->total_bitmaps++;
399 "Duplicate entries in free space cache\n");
404 list_add_tail(&e->list, &bitmaps);
410 io_ctl_unmap_page(&io_ctl);
413 * We add the bitmaps at the end of the entries in order that
414 * the bitmap entries are added to the cache.
416 list_for_each_entry_safe(e, n, &bitmaps, list) {
417 list_del_init(&e->list);
418 ret = io_ctl_read_bitmap(&io_ctl, e);
423 io_ctl_drop_pages(&io_ctl);
424 merge_space_tree(ctl);
427 io_ctl_free(&io_ctl);
430 io_ctl_drop_pages(&io_ctl);
431 __btrfs_remove_free_space_cache(ctl);
435 int load_free_space_cache(struct btrfs_fs_info *fs_info,
436 struct btrfs_block_group_cache *block_group)
438 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
439 struct btrfs_path *path;
440 u64 used = btrfs_block_group_used(&block_group->item);
444 path = btrfs_alloc_path();
448 ret = __load_free_space_cache(fs_info->tree_root, ctl, path,
449 block_group->key.objectid);
450 btrfs_free_path(path);
452 matched = (ctl->free_space == (block_group->key.offset - used -
453 block_group->bytes_super));
454 if (ret == 1 && !matched) {
455 __btrfs_remove_free_space_cache(ctl);
457 "block group %llu has wrong amount of free space\n",
458 block_group->key.objectid);
466 "failed to load free space cache for block group %llu\n",
467 block_group->key.objectid);
473 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
476 BUG_ON(offset < bitmap_start);
477 offset -= bitmap_start;
478 return (unsigned long)(offset / unit);
481 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
483 return (unsigned long)(bytes / unit);
486 static int tree_insert_offset(struct rb_root *root, u64 offset,
487 struct rb_node *node, int bitmap)
489 struct rb_node **p = &root->rb_node;
490 struct rb_node *parent = NULL;
491 struct btrfs_free_space *info;
495 info = rb_entry(parent, struct btrfs_free_space, offset_index);
497 if (offset < info->offset) {
499 } else if (offset > info->offset) {
503 * we could have a bitmap entry and an extent entry
504 * share the same offset. If this is the case, we want
505 * the extent entry to always be found first if we do a
506 * linear search through the tree, since we want to have
507 * the quickest allocation time, and allocating from an
508 * extent is faster than allocating from a bitmap. So
509 * if we're inserting a bitmap and we find an entry at
510 * this offset, we want to go right, or after this entry
511 * logically. If we are inserting an extent and we've
512 * found a bitmap, we want to go left, or before
527 rb_link_node(node, parent, p);
528 rb_insert_color(node, root);
534 * searches the tree for the given offset.
536 * fuzzy - If this is set, then we are trying to make an allocation, and we just
537 * want a section that has at least bytes size and comes at or after the given
540 static struct btrfs_free_space *
541 tree_search_offset(struct btrfs_free_space_ctl *ctl,
542 u64 offset, int bitmap_only, int fuzzy)
544 struct rb_node *n = ctl->free_space_offset.rb_node;
545 struct btrfs_free_space *entry, *prev = NULL;
546 u32 sectorsize = ctl->sectorsize;
548 /* find entry that is closest to the 'offset' */
555 entry = rb_entry(n, struct btrfs_free_space, offset_index);
558 if (offset < entry->offset)
560 else if (offset > entry->offset)
573 * bitmap entry and extent entry may share same offset,
574 * in that case, bitmap entry comes after extent entry.
579 entry = rb_entry(n, struct btrfs_free_space, offset_index);
580 if (entry->offset != offset)
583 WARN_ON(!entry->bitmap);
588 * if previous extent entry covers the offset,
589 * we should return it instead of the bitmap entry
591 n = rb_prev(&entry->offset_index);
593 prev = rb_entry(n, struct btrfs_free_space,
596 prev->offset + prev->bytes > offset)
606 /* find last entry before the 'offset' */
608 if (entry->offset > offset) {
609 n = rb_prev(&entry->offset_index);
611 entry = rb_entry(n, struct btrfs_free_space,
613 BUG_ON(entry->offset > offset);
623 n = rb_prev(&entry->offset_index);
625 prev = rb_entry(n, struct btrfs_free_space,
628 prev->offset + prev->bytes > offset)
631 if (entry->offset + BITS_PER_BITMAP(sectorsize) * ctl->unit > offset)
633 } else if (entry->offset + entry->bytes > offset)
641 if (entry->offset + BITS_PER_BITMAP(sectorsize) *
645 if (entry->offset + entry->bytes > offset)
649 n = rb_next(&entry->offset_index);
652 entry = rb_entry(n, struct btrfs_free_space, offset_index);
657 void unlink_free_space(struct btrfs_free_space_ctl *ctl,
658 struct btrfs_free_space *info)
660 rb_erase(&info->offset_index, &ctl->free_space_offset);
662 ctl->free_space -= info->bytes;
665 static int link_free_space(struct btrfs_free_space_ctl *ctl,
666 struct btrfs_free_space *info)
670 BUG_ON(!info->bitmap && !info->bytes);
671 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
672 &info->offset_index, (info->bitmap != NULL));
676 ctl->free_space += info->bytes;
681 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
682 struct btrfs_free_space *bitmap_info, u64 *offset,
685 unsigned long found_bits = 0;
686 unsigned long bits, i;
687 unsigned long next_zero;
688 u32 sectorsize = ctl->sectorsize;
690 i = offset_to_bit(bitmap_info->offset, ctl->unit,
691 max_t(u64, *offset, bitmap_info->offset));
692 bits = bytes_to_bits(*bytes, ctl->unit);
694 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP(sectorsize)) {
695 next_zero = find_next_zero_bit(bitmap_info->bitmap,
696 BITS_PER_BITMAP(sectorsize), i);
697 if ((next_zero - i) >= bits) {
698 found_bits = next_zero - i;
705 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
706 *bytes = (u64)(found_bits) * ctl->unit;
713 struct btrfs_free_space *
714 btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes)
716 return tree_search_offset(ctl, offset, 0, 0);
719 static void try_merge_free_space(struct btrfs_free_space_ctl *ctl,
720 struct btrfs_free_space *info)
722 struct btrfs_free_space *left_info;
723 struct btrfs_free_space *right_info;
724 u64 offset = info->offset;
725 u64 bytes = info->bytes;
728 * first we want to see if there is free space adjacent to the range we
729 * are adding, if there is remove that struct and add a new one to
730 * cover the entire range
732 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
733 if (right_info && rb_prev(&right_info->offset_index))
734 left_info = rb_entry(rb_prev(&right_info->offset_index),
735 struct btrfs_free_space, offset_index);
737 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
739 if (right_info && !right_info->bitmap) {
740 unlink_free_space(ctl, right_info);
741 info->bytes += right_info->bytes;
745 if (left_info && !left_info->bitmap &&
746 left_info->offset + left_info->bytes == offset) {
747 unlink_free_space(ctl, left_info);
748 info->offset = left_info->offset;
749 info->bytes += left_info->bytes;
754 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
757 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
758 struct btrfs_free_space *info;
762 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
763 info = rb_entry(n, struct btrfs_free_space, offset_index);
764 if (info->bytes >= bytes && !block_group->ro)
766 printk("entry offset %llu, bytes %llu, bitmap %s\n",
767 (unsigned long long)info->offset,
768 (unsigned long long)info->bytes,
769 (info->bitmap) ? "yes" : "no");
771 printk("%d blocks of free space at or bigger than bytes is \n", count);
774 int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
777 struct btrfs_free_space_ctl *ctl;
779 ctl = calloc(1, sizeof(*ctl));
783 ctl->sectorsize = sectorsize;
784 ctl->unit = sectorsize;
785 ctl->start = block_group->key.objectid;
786 ctl->private = block_group;
787 block_group->free_space_ctl = ctl;
792 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
794 struct btrfs_free_space *info;
795 struct rb_node *node;
797 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
798 info = rb_entry(node, struct btrfs_free_space, offset_index);
799 unlink_free_space(ctl, info);
805 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
807 __btrfs_remove_free_space_cache(block_group->free_space_ctl);
810 int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset,
813 struct btrfs_free_space *info;
816 info = calloc(1, sizeof(*info));
820 info->offset = offset;
823 try_merge_free_space(ctl, info);
825 ret = link_free_space(ctl, info);
827 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
828 BUG_ON(ret == -EEXIST);
835 * Merges all the free space cache and kills the bitmap entries since we just
836 * want to use the free space cache to verify it's correct, no reason to keep
837 * the bitmaps around to confuse things.
839 static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
841 struct btrfs_free_space *e, *prev = NULL;
844 u32 sectorsize = ctl->sectorsize;
848 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
849 e = rb_entry(n, struct btrfs_free_space, offset_index);
851 u64 offset = e->offset, bytes = ctl->unit;
854 end = e->offset + (u64)(BITS_PER_BITMAP(sectorsize) * ctl->unit);
856 unlink_free_space(ctl, e);
857 while (!(search_bitmap(ctl, e, &offset, &bytes))) {
858 ret = btrfs_add_free_space(ctl, offset,
872 if (prev->offset + prev->bytes == e->offset) {
873 unlink_free_space(ctl, prev);
874 unlink_free_space(ctl, e);
875 prev->bytes += e->bytes;
877 link_free_space(ctl, prev);
885 int btrfs_clear_free_space_cache(struct btrfs_fs_info *fs_info,
886 struct btrfs_block_group_cache *bg)
888 struct btrfs_trans_handle *trans;
889 struct btrfs_root *tree_root = fs_info->tree_root;
890 struct btrfs_path path;
891 struct btrfs_key key;
892 struct btrfs_disk_key location;
893 struct btrfs_free_space_header *sc_header;
894 struct extent_buffer *node;
899 trans = btrfs_start_transaction(tree_root, 1);
901 return PTR_ERR(trans);
903 btrfs_init_path(&path);
905 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
907 key.offset = bg->key.objectid;
909 ret = btrfs_search_slot(trans, tree_root, &key, &path, -1, 1);
917 node = path.nodes[0];
918 slot = path.slots[0];
919 sc_header = btrfs_item_ptr(node, slot, struct btrfs_free_space_header);
920 btrfs_free_space_key(node, sc_header, &location);
921 ino = btrfs_disk_key_objectid(&location);
923 /* Delete the free space header, as we have the ino to continue */
924 ret = btrfs_del_item(trans, tree_root, &path);
926 error("failed to remove free space header for block group %llu: %d",
927 bg->key.objectid, ret);
930 btrfs_release_path(&path);
932 /* Iterate from the end of the free space cache inode */
934 key.type = BTRFS_EXTENT_DATA_KEY;
935 key.offset = (u64)-1;
936 ret = btrfs_search_slot(trans, tree_root, &key, &path, -1, 1);
938 error("failed to locate free space cache extent for block group %llu: %d",
939 bg->key.objectid, ret);
943 struct btrfs_file_extent_item *fi;
947 ret = btrfs_previous_item(tree_root, &path, ino,
948 BTRFS_EXTENT_DATA_KEY);
955 "failed to locate free space cache extent for block group %llu: %d",
956 bg->key.objectid, ret);
959 node = path.nodes[0];
960 slot = path.slots[0];
961 btrfs_item_key_to_cpu(node, &key, slot);
962 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
963 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
964 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
966 ret = btrfs_free_extent(trans, tree_root, disk_bytenr,
967 disk_num_bytes, 0, tree_root->objectid,
970 error("failed to remove backref for disk bytenr %llu: %d",
974 ret = btrfs_del_item(trans, tree_root, &path);
977 "failed to remove free space extent data for ino %llu offset %llu: %d",
978 ino, key.offset, ret);
982 btrfs_release_path(&path);
984 /* Now delete free space cache inode item */
986 key.type = BTRFS_INODE_ITEM_KEY;
989 ret = btrfs_search_slot(trans, tree_root, &key, &path, -1, 1);
991 warning("free space inode %llu not found, ignore", ino);
994 "failed to locate free space cache inode %llu for block group %llu: %d",
995 ino, bg->key.objectid, ret);
998 ret = btrfs_del_item(trans, tree_root, &path);
1001 "failed to delete free space cache inode %llu for block group %llu: %d",
1002 ino, bg->key.objectid, ret);
1005 btrfs_release_path(&path);
1007 btrfs_commit_transaction(trans, tree_root);