2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include "kerncompat.h"
21 #include "free-space-cache.h"
22 #include "transaction.h"
24 #include "extent_io.h"
29 * Kernel always uses PAGE_CACHE_SIZE for sectorsize, but we don't have
30 * anything like that in userspace and have to get the value from the
33 #define BITS_PER_BITMAP(sectorsize) ((sectorsize) * 8)
34 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
36 static int link_free_space(struct btrfs_free_space_ctl *ctl,
37 struct btrfs_free_space *info);
38 static void merge_space_tree(struct btrfs_free_space_ctl *ctl);
43 struct btrfs_root *root;
48 unsigned check_crcs:1;
51 static int io_ctl_init(struct io_ctl *io_ctl, u64 size, u64 ino,
52 struct btrfs_root *root)
54 memset(io_ctl, 0, sizeof(struct io_ctl));
55 io_ctl->num_pages = (size + root->sectorsize - 1) / root->sectorsize;
56 io_ctl->buffer = kzalloc(size, GFP_NOFS);
59 io_ctl->total_size = size;
61 if (ino != BTRFS_FREE_INO_OBJECTID)
62 io_ctl->check_crcs = 1;
66 static void io_ctl_free(struct io_ctl *io_ctl)
68 kfree(io_ctl->buffer);
71 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
79 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
81 BUG_ON(io_ctl->index >= io_ctl->num_pages);
82 io_ctl->cur = io_ctl->buffer + (io_ctl->index++ * io_ctl->root->sectorsize);
83 io_ctl->orig = io_ctl->cur;
84 io_ctl->size = io_ctl->root->sectorsize;
86 memset(io_ctl->cur, 0, io_ctl->root->sectorsize);
89 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
91 io_ctl_unmap_page(io_ctl);
94 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct btrfs_root *root,
95 struct btrfs_path *path, u64 ino)
97 struct extent_buffer *leaf;
98 struct btrfs_file_extent_item *fi;
105 key.type = BTRFS_EXTENT_DATA_KEY;
108 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
110 printf("Couldn't find file extent item for free space inode"
112 btrfs_release_path(path);
116 while (total_read < io_ctl->total_size) {
117 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
118 ret = btrfs_next_leaf(root, path);
124 leaf = path->nodes[0];
126 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
127 if (key.objectid != ino) {
132 if (key.type != BTRFS_EXTENT_DATA_KEY) {
137 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
138 struct btrfs_file_extent_item);
139 if (btrfs_file_extent_type(path->nodes[0], fi) !=
140 BTRFS_FILE_EXTENT_REG) {
141 printf("Not the file extent type we wanted\n");
146 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi) +
147 btrfs_file_extent_offset(leaf, fi);
148 len = btrfs_file_extent_num_bytes(leaf, fi);
149 ret = read_data_from_disk(root->fs_info,
150 io_ctl->buffer + key.offset, bytenr,
158 btrfs_release_path(path);
162 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
167 * Skip the crc area. If we don't check crcs then we just have a 64bit
168 * chunk at the front of the first page.
170 if (io_ctl->check_crcs) {
171 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
172 io_ctl->size -= sizeof(u64) +
173 (sizeof(u32) * io_ctl->num_pages);
175 io_ctl->cur += sizeof(u64);
176 io_ctl->size -= sizeof(u64) * 2;
180 if (le64_to_cpu(*gen) != generation) {
181 printk("btrfs: space cache generation "
182 "(%Lu) does not match inode (%Lu)\n", *gen,
184 io_ctl_unmap_page(io_ctl);
187 io_ctl->cur += sizeof(u64);
191 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
197 if (!io_ctl->check_crcs) {
198 io_ctl_map_page(io_ctl, 0);
203 offset = sizeof(u32) * io_ctl->num_pages;
205 tmp = io_ctl->buffer;
209 io_ctl_map_page(io_ctl, 0);
210 crc = crc32c(crc, io_ctl->orig + offset, io_ctl->root->sectorsize - offset);
211 btrfs_csum_final(crc, (char *)&crc);
213 printk("btrfs: csum mismatch on free space cache\n");
214 io_ctl_unmap_page(io_ctl);
221 static int io_ctl_read_entry(struct io_ctl *io_ctl,
222 struct btrfs_free_space *entry, u8 *type)
224 struct btrfs_free_space_entry *e;
228 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
234 entry->offset = le64_to_cpu(e->offset);
235 entry->bytes = le64_to_cpu(e->bytes);
237 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
238 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
240 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
243 io_ctl_unmap_page(io_ctl);
248 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
249 struct btrfs_free_space *entry)
253 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
257 memcpy(entry->bitmap, io_ctl->cur, io_ctl->root->sectorsize);
258 io_ctl_unmap_page(io_ctl);
264 static int __load_free_space_cache(struct btrfs_root *root,
265 struct btrfs_free_space_ctl *ctl,
266 struct btrfs_path *path, u64 offset)
268 struct btrfs_free_space_header *header;
269 struct btrfs_inode_item *inode_item;
270 struct extent_buffer *leaf;
271 struct io_ctl io_ctl;
272 struct btrfs_key key;
273 struct btrfs_key inode_location;
274 struct btrfs_disk_key disk_key;
275 struct btrfs_free_space *e, *n;
276 struct list_head bitmaps;
284 INIT_LIST_HEAD(&bitmaps);
286 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
290 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
293 } else if (ret > 0) {
294 btrfs_release_path(path);
298 leaf = path->nodes[0];
299 header = btrfs_item_ptr(leaf, path->slots[0],
300 struct btrfs_free_space_header);
301 num_entries = btrfs_free_space_entries(leaf, header);
302 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
303 generation = btrfs_free_space_generation(leaf, header);
304 btrfs_free_space_key(leaf, header, &disk_key);
305 btrfs_disk_key_to_cpu(&inode_location, &disk_key);
306 btrfs_release_path(path);
308 ret = btrfs_search_slot(NULL, root, &inode_location, path, 0, 0);
310 printf("Couldn't find free space inode %d\n", ret);
314 leaf = path->nodes[0];
315 inode_item = btrfs_item_ptr(leaf, path->slots[0],
316 struct btrfs_inode_item);
318 inode_size = btrfs_inode_size(leaf, inode_item);
319 if (!inode_size || !btrfs_inode_generation(leaf, inode_item)) {
320 btrfs_release_path(path);
324 if (btrfs_inode_generation(leaf, inode_item) != generation) {
325 printf("free space inode generation (%llu) did not match "
326 "free space cache generation (%llu)\n",
327 (unsigned long long)btrfs_inode_generation(leaf,
329 (unsigned long long)generation);
330 btrfs_release_path(path);
334 btrfs_release_path(path);
339 ret = io_ctl_init(&io_ctl, inode_size, inode_location.objectid, root);
343 ret = io_ctl_prepare_pages(&io_ctl, root, path,
344 inode_location.objectid);
348 ret = io_ctl_check_crc(&io_ctl, 0);
352 ret = io_ctl_check_generation(&io_ctl, generation);
356 while (num_entries) {
357 e = calloc(1, sizeof(*e));
361 ret = io_ctl_read_entry(&io_ctl, e, &type);
372 if (type == BTRFS_FREE_SPACE_EXTENT) {
373 ret = link_free_space(ctl, e);
375 printf("Duplicate entries in free space cache, dumping");
380 BUG_ON(!num_bitmaps);
382 e->bitmap = kzalloc(ctl->sectorsize, GFP_NOFS);
387 ret = link_free_space(ctl, e);
388 ctl->total_bitmaps++;
390 printf("Duplicate entries in free space cache, dumping");
395 list_add_tail(&e->list, &bitmaps);
401 io_ctl_unmap_page(&io_ctl);
404 * We add the bitmaps at the end of the entries in order that
405 * the bitmap entries are added to the cache.
407 list_for_each_entry_safe(e, n, &bitmaps, list) {
408 list_del_init(&e->list);
409 ret = io_ctl_read_bitmap(&io_ctl, e);
414 io_ctl_drop_pages(&io_ctl);
415 merge_space_tree(ctl);
418 io_ctl_free(&io_ctl);
421 io_ctl_drop_pages(&io_ctl);
422 __btrfs_remove_free_space_cache(ctl);
426 int load_free_space_cache(struct btrfs_fs_info *fs_info,
427 struct btrfs_block_group_cache *block_group)
429 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
430 struct btrfs_path *path;
433 path = btrfs_alloc_path();
437 ret = __load_free_space_cache(fs_info->tree_root, ctl, path,
438 block_group->key.objectid);
439 btrfs_free_path(path);
444 printf("failed to load free space cache for block group %llu\n",
445 block_group->key.objectid);
451 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
454 BUG_ON(offset < bitmap_start);
455 offset -= bitmap_start;
456 return (unsigned long)(offset / unit);
459 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
461 return (unsigned long)(bytes / unit);
464 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
468 u64 bytes_per_bitmap;
469 u32 sectorsize = ctl->sectorsize;
471 bytes_per_bitmap = BITS_PER_BITMAP(sectorsize) * ctl->unit;
472 bitmap_start = offset - ctl->start;
473 bitmap_start = bitmap_start / bytes_per_bitmap;
474 bitmap_start *= bytes_per_bitmap;
475 bitmap_start += ctl->start;
480 static int tree_insert_offset(struct rb_root *root, u64 offset,
481 struct rb_node *node, int bitmap)
483 struct rb_node **p = &root->rb_node;
484 struct rb_node *parent = NULL;
485 struct btrfs_free_space *info;
489 info = rb_entry(parent, struct btrfs_free_space, offset_index);
491 if (offset < info->offset) {
493 } else if (offset > info->offset) {
497 * we could have a bitmap entry and an extent entry
498 * share the same offset. If this is the case, we want
499 * the extent entry to always be found first if we do a
500 * linear search through the tree, since we want to have
501 * the quickest allocation time, and allocating from an
502 * extent is faster than allocating from a bitmap. So
503 * if we're inserting a bitmap and we find an entry at
504 * this offset, we want to go right, or after this entry
505 * logically. If we are inserting an extent and we've
506 * found a bitmap, we want to go left, or before
521 rb_link_node(node, parent, p);
522 rb_insert_color(node, root);
528 * searches the tree for the given offset.
530 * fuzzy - If this is set, then we are trying to make an allocation, and we just
531 * want a section that has at least bytes size and comes at or after the given
534 static struct btrfs_free_space *
535 tree_search_offset(struct btrfs_free_space_ctl *ctl,
536 u64 offset, int bitmap_only, int fuzzy)
538 struct rb_node *n = ctl->free_space_offset.rb_node;
539 struct btrfs_free_space *entry, *prev = NULL;
540 u32 sectorsize = ctl->sectorsize;
542 /* find entry that is closest to the 'offset' */
549 entry = rb_entry(n, struct btrfs_free_space, offset_index);
552 if (offset < entry->offset)
554 else if (offset > entry->offset)
567 * bitmap entry and extent entry may share same offset,
568 * in that case, bitmap entry comes after extent entry.
573 entry = rb_entry(n, struct btrfs_free_space, offset_index);
574 if (entry->offset != offset)
577 WARN_ON(!entry->bitmap);
582 * if previous extent entry covers the offset,
583 * we should return it instead of the bitmap entry
585 n = rb_prev(&entry->offset_index);
587 prev = rb_entry(n, struct btrfs_free_space,
590 prev->offset + prev->bytes > offset)
600 /* find last entry before the 'offset' */
602 if (entry->offset > offset) {
603 n = rb_prev(&entry->offset_index);
605 entry = rb_entry(n, struct btrfs_free_space,
607 BUG_ON(entry->offset > offset);
617 n = rb_prev(&entry->offset_index);
619 prev = rb_entry(n, struct btrfs_free_space,
622 prev->offset + prev->bytes > offset)
625 if (entry->offset + BITS_PER_BITMAP(sectorsize) * ctl->unit > offset)
627 } else if (entry->offset + entry->bytes > offset)
635 if (entry->offset + BITS_PER_BITMAP(sectorsize) *
639 if (entry->offset + entry->bytes > offset)
643 n = rb_next(&entry->offset_index);
646 entry = rb_entry(n, struct btrfs_free_space, offset_index);
651 void unlink_free_space(struct btrfs_free_space_ctl *ctl,
652 struct btrfs_free_space *info)
654 rb_erase(&info->offset_index, &ctl->free_space_offset);
656 ctl->free_space -= info->bytes;
659 static int link_free_space(struct btrfs_free_space_ctl *ctl,
660 struct btrfs_free_space *info)
664 BUG_ON(!info->bitmap && !info->bytes);
665 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
666 &info->offset_index, (info->bitmap != NULL));
670 ctl->free_space += info->bytes;
675 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
676 struct btrfs_free_space *bitmap_info, u64 *offset,
679 unsigned long found_bits = 0;
680 unsigned long bits, i;
681 unsigned long next_zero;
682 u32 sectorsize = ctl->sectorsize;
684 i = offset_to_bit(bitmap_info->offset, ctl->unit,
685 max_t(u64, *offset, bitmap_info->offset));
686 bits = bytes_to_bits(*bytes, ctl->unit);
688 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP(sectorsize)) {
689 next_zero = find_next_zero_bit(bitmap_info->bitmap,
690 BITS_PER_BITMAP(sectorsize), i);
691 if ((next_zero - i) >= bits) {
692 found_bits = next_zero - i;
699 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
700 *bytes = (u64)(found_bits) * ctl->unit;
707 struct btrfs_free_space *
708 btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes)
710 return tree_search_offset(ctl, offset, 0, 0);
713 static void try_merge_free_space(struct btrfs_free_space_ctl *ctl,
714 struct btrfs_free_space *info)
716 struct btrfs_free_space *left_info;
717 struct btrfs_free_space *right_info;
718 u64 offset = info->offset;
719 u64 bytes = info->bytes;
722 * first we want to see if there is free space adjacent to the range we
723 * are adding, if there is remove that struct and add a new one to
724 * cover the entire range
726 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
727 if (right_info && rb_prev(&right_info->offset_index))
728 left_info = rb_entry(rb_prev(&right_info->offset_index),
729 struct btrfs_free_space, offset_index);
731 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
733 if (right_info && !right_info->bitmap) {
734 unlink_free_space(ctl, right_info);
735 info->bytes += right_info->bytes;
739 if (left_info && !left_info->bitmap &&
740 left_info->offset + left_info->bytes == offset) {
741 unlink_free_space(ctl, left_info);
742 info->offset = left_info->offset;
743 info->bytes += left_info->bytes;
748 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
751 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
752 struct btrfs_free_space *info;
756 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
757 info = rb_entry(n, struct btrfs_free_space, offset_index);
758 if (info->bytes >= bytes && !block_group->ro)
760 printk("entry offset %llu, bytes %llu, bitmap %s\n",
761 (unsigned long long)info->offset,
762 (unsigned long long)info->bytes,
763 (info->bitmap) ? "yes" : "no");
765 printk("%d blocks of free space at or bigger than bytes is \n", count);
768 int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
771 struct btrfs_free_space_ctl *ctl;
773 ctl = calloc(1, sizeof(*ctl));
777 ctl->sectorsize = sectorsize;
778 ctl->unit = sectorsize;
779 ctl->start = block_group->key.objectid;
780 ctl->private = block_group;
781 block_group->free_space_ctl = ctl;
786 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
788 struct btrfs_free_space *info;
789 struct rb_node *node;
791 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
792 info = rb_entry(node, struct btrfs_free_space, offset_index);
793 unlink_free_space(ctl, info);
799 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
801 __btrfs_remove_free_space_cache(block_group->free_space_ctl);
804 static int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset,
807 struct btrfs_free_space *info;
810 info = calloc(1, sizeof(*info));
814 info->offset = offset;
817 try_merge_free_space(ctl, info);
819 ret = link_free_space(ctl, info);
821 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
822 BUG_ON(ret == -EEXIST);
829 * Merges all the free space cache and kills the bitmap entries since we just
830 * want to use the free space cache to verify it's correct, no reason to keep
831 * the bitmaps around to confuse things.
833 static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
835 struct btrfs_free_space *e, *prev = NULL;
838 u32 sectorsize = ctl->sectorsize;
842 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
843 e = rb_entry(n, struct btrfs_free_space, offset_index);
845 u64 offset = e->offset, bytes = ctl->unit;
848 end = e->offset + (u64)(BITS_PER_BITMAP(sectorsize) * ctl->unit);
850 unlink_free_space(ctl, e);
851 while (!(search_bitmap(ctl, e, &offset, &bytes))) {
852 ret = btrfs_add_free_space(ctl, offset,
866 if (prev->offset + prev->bytes == e->offset) {
867 unlink_free_space(ctl, prev);
868 unlink_free_space(ctl, e);
869 prev->bytes += e->bytes;
871 link_free_space(ctl, prev);