2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
24 #include "free-space-cache.h"
25 #include "transaction.h"
27 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
28 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
30 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
33 BUG_ON(offset < bitmap_start);
34 offset -= bitmap_start;
35 return (unsigned long)(div64_u64(offset, sectorsize));
38 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
40 return (unsigned long)(div64_u64(bytes, sectorsize));
43 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
49 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
50 bitmap_start = offset - block_group->key.objectid;
51 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
52 bitmap_start *= bytes_per_bitmap;
53 bitmap_start += block_group->key.objectid;
58 static int tree_insert_offset(struct rb_root *root, u64 offset,
59 struct rb_node *node, int bitmap)
61 struct rb_node **p = &root->rb_node;
62 struct rb_node *parent = NULL;
63 struct btrfs_free_space *info;
67 info = rb_entry(parent, struct btrfs_free_space, offset_index);
69 if (offset < info->offset) {
71 } else if (offset > info->offset) {
75 * we could have a bitmap entry and an extent entry
76 * share the same offset. If this is the case, we want
77 * the extent entry to always be found first if we do a
78 * linear search through the tree, since we want to have
79 * the quickest allocation time, and allocating from an
80 * extent is faster than allocating from a bitmap. So
81 * if we're inserting a bitmap and we find an entry at
82 * this offset, we want to go right, or after this entry
83 * logically. If we are inserting an extent and we've
84 * found a bitmap, we want to go left, or before
88 WARN_ON(info->bitmap);
91 WARN_ON(!info->bitmap);
97 rb_link_node(node, parent, p);
98 rb_insert_color(node, root);
104 * searches the tree for the given offset.
106 * fuzzy - If this is set, then we are trying to make an allocation, and we just
107 * want a section that has at least bytes size and comes at or after the given
110 static struct btrfs_free_space *
111 tree_search_offset(struct btrfs_block_group_cache *block_group,
112 u64 offset, int bitmap_only, int fuzzy)
114 struct rb_node *n = block_group->free_space_offset.rb_node;
115 struct btrfs_free_space *entry, *prev = NULL;
117 /* find entry that is closest to the 'offset' */
124 entry = rb_entry(n, struct btrfs_free_space, offset_index);
127 if (offset < entry->offset)
129 else if (offset > entry->offset)
142 * bitmap entry and extent entry may share same offset,
143 * in that case, bitmap entry comes after extent entry.
148 entry = rb_entry(n, struct btrfs_free_space, offset_index);
149 if (entry->offset != offset)
152 WARN_ON(!entry->bitmap);
157 * if previous extent entry covers the offset,
158 * we should return it instead of the bitmap entry
160 n = &entry->offset_index;
165 prev = rb_entry(n, struct btrfs_free_space,
168 if (prev->offset + prev->bytes > offset)
180 /* find last entry before the 'offset' */
182 if (entry->offset > offset) {
183 n = rb_prev(&entry->offset_index);
185 entry = rb_entry(n, struct btrfs_free_space,
187 BUG_ON(entry->offset > offset);
197 n = &entry->offset_index;
202 prev = rb_entry(n, struct btrfs_free_space,
205 if (prev->offset + prev->bytes > offset)
210 if (entry->offset + BITS_PER_BITMAP *
211 block_group->sectorsize > offset)
213 } else if (entry->offset + entry->bytes > offset)
221 if (entry->offset + BITS_PER_BITMAP *
222 block_group->sectorsize > offset)
225 if (entry->offset + entry->bytes > offset)
229 n = rb_next(&entry->offset_index);
232 entry = rb_entry(n, struct btrfs_free_space, offset_index);
237 static void unlink_free_space(struct btrfs_block_group_cache *block_group,
238 struct btrfs_free_space *info)
240 rb_erase(&info->offset_index, &block_group->free_space_offset);
241 block_group->free_extents--;
242 block_group->free_space -= info->bytes;
245 static int link_free_space(struct btrfs_block_group_cache *block_group,
246 struct btrfs_free_space *info)
250 BUG_ON(!info->bitmap && !info->bytes);
251 ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
252 &info->offset_index, (info->bitmap != NULL));
256 block_group->free_space += info->bytes;
257 block_group->free_extents++;
261 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
268 * The goal is to keep the total amount of memory used per 1gb of space
269 * at or below 32k, so we need to adjust how much memory we allow to be
270 * used by extent based free space tracking
272 max_bytes = MAX_CACHE_BYTES_PER_GIG *
273 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
276 * we want to account for 1 more bitmap than what we have so we can make
277 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
278 * we add more bitmaps.
280 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
282 if (bitmap_bytes >= max_bytes) {
283 block_group->extents_thresh = 0;
288 * we want the extent entry threshold to always be at most 1/2 the maxw
289 * bytes we can have, or whatever is less than that.
291 extent_bytes = max_bytes - bitmap_bytes;
292 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
294 block_group->extents_thresh =
295 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
298 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
299 struct btrfs_free_space *info, u64 offset,
302 unsigned long start, end;
305 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
306 end = start + bytes_to_bits(bytes, block_group->sectorsize);
307 BUG_ON(end > BITS_PER_BITMAP);
309 for (i = start; i < end; i++)
310 clear_bit(i, info->bitmap);
312 info->bytes -= bytes;
313 block_group->free_space -= bytes;
316 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
317 struct btrfs_free_space *info, u64 offset,
320 unsigned long start, end;
323 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
324 end = start + bytes_to_bits(bytes, block_group->sectorsize);
325 BUG_ON(end > BITS_PER_BITMAP);
327 for (i = start; i < end; i++)
328 set_bit(i, info->bitmap);
330 info->bytes += bytes;
331 block_group->free_space += bytes;
334 static int search_bitmap(struct btrfs_block_group_cache *block_group,
335 struct btrfs_free_space *bitmap_info, u64 *offset,
338 unsigned long found_bits = 0;
339 unsigned long bits, i;
340 unsigned long next_zero;
342 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
343 max_t(u64, *offset, bitmap_info->offset));
344 bits = bytes_to_bits(*bytes, block_group->sectorsize);
346 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
348 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
349 next_zero = find_next_zero_bit(bitmap_info->bitmap,
351 if ((next_zero - i) >= bits) {
352 found_bits = next_zero - i;
359 *offset = (u64)(i * block_group->sectorsize) +
361 *bytes = (u64)(found_bits) * block_group->sectorsize;
368 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
369 *block_group, u64 *offset,
370 u64 *bytes, int debug)
372 struct btrfs_free_space *entry;
373 struct rb_node *node;
376 if (!block_group->free_space_offset.rb_node)
379 entry = tree_search_offset(block_group,
380 offset_to_bitmap(block_group, *offset),
385 for (node = &entry->offset_index; node; node = rb_next(node)) {
386 entry = rb_entry(node, struct btrfs_free_space, offset_index);
387 if (entry->bytes < *bytes)
391 ret = search_bitmap(block_group, entry, offset, bytes);
397 *offset = entry->offset;
398 *bytes = entry->bytes;
405 static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
406 struct btrfs_free_space *info, u64 offset)
408 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
409 int max_bitmaps = (int)div64_u64(block_group->key.offset +
410 bytes_per_bg - 1, bytes_per_bg);
411 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
413 info->offset = offset_to_bitmap(block_group, offset);
415 link_free_space(block_group, info);
416 block_group->total_bitmaps++;
418 recalculate_thresholds(block_group);
421 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
422 struct btrfs_free_space *bitmap_info,
423 u64 *offset, u64 *bytes)
426 u64 search_start, search_bytes;
430 end = bitmap_info->offset +
431 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
434 * XXX - this can go away after a few releases.
436 * since the only user of btrfs_remove_free_space is the tree logging
437 * stuff, and the only way to test that is under crash conditions, we
438 * want to have this debug stuff here just in case somethings not
439 * working. Search the bitmap for the space we are trying to use to
440 * make sure its actually there. If its not there then we need to stop
441 * because something has gone wrong.
443 search_start = *offset;
444 search_bytes = *bytes;
445 ret = search_bitmap(block_group, bitmap_info, &search_start,
447 BUG_ON(ret < 0 || search_start != *offset);
449 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
450 bitmap_clear_bits(block_group, bitmap_info, *offset,
452 *bytes -= end - *offset + 1;
454 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
455 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
460 struct rb_node *next = rb_next(&bitmap_info->offset_index);
461 if (!bitmap_info->bytes) {
462 unlink_free_space(block_group, bitmap_info);
463 kfree(bitmap_info->bitmap);
465 block_group->total_bitmaps--;
466 recalculate_thresholds(block_group);
470 * no entry after this bitmap, but we still have bytes to
471 * remove, so something has gone wrong.
476 bitmap_info = rb_entry(next, struct btrfs_free_space,
480 * if the next entry isn't a bitmap we need to return to let the
481 * extent stuff do its work.
483 if (!bitmap_info->bitmap)
487 * Ok the next item is a bitmap, but it may not actually hold
488 * the information for the rest of this free space stuff, so
489 * look for it, and if we don't find it return so we can try
490 * everything over again.
492 search_start = *offset;
493 search_bytes = *bytes;
494 ret = search_bitmap(block_group, bitmap_info, &search_start,
496 if (ret < 0 || search_start != *offset)
500 } else if (!bitmap_info->bytes) {
501 unlink_free_space(block_group, bitmap_info);
502 kfree(bitmap_info->bitmap);
504 block_group->total_bitmaps--;
505 recalculate_thresholds(block_group);
511 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
512 struct btrfs_free_space *info)
514 struct btrfs_free_space *bitmap_info;
516 u64 bytes, offset, end;
520 * If we are below the extents threshold then we can add this as an
521 * extent, and don't have to deal with the bitmap
523 if (block_group->free_extents < block_group->extents_thresh &&
524 info->bytes > block_group->sectorsize * 4)
528 * some block groups are so tiny they can't be enveloped by a bitmap, so
529 * don't even bother to create a bitmap for this
531 if (BITS_PER_BITMAP * block_group->sectorsize >
532 block_group->key.offset)
536 offset = info->offset;
539 bitmap_info = tree_search_offset(block_group,
540 offset_to_bitmap(block_group, offset),
547 end = bitmap_info->offset +
548 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
550 if (offset >= bitmap_info->offset && offset + bytes > end) {
551 bitmap_set_bits(block_group, bitmap_info, offset,
553 bytes -= end - offset;
556 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
557 bitmap_set_bits(block_group, bitmap_info, offset, bytes);
570 if (info && info->bitmap) {
571 add_new_bitmap(block_group, info, offset);
576 spin_unlock(&block_group->tree_lock);
578 /* no pre-allocated info, allocate a new one */
580 info = kzalloc(sizeof(struct btrfs_free_space),
583 spin_lock(&block_group->tree_lock);
589 /* allocate the bitmap */
590 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
591 spin_lock(&block_group->tree_lock);
609 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
610 u64 offset, u64 bytes)
612 struct btrfs_free_space *right_info = NULL;
613 struct btrfs_free_space *left_info = NULL;
614 struct btrfs_free_space *info = NULL;
617 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
621 info->offset = offset;
624 spin_lock(&block_group->tree_lock);
627 * first we want to see if there is free space adjacent to the range we
628 * are adding, if there is remove that struct and add a new one to
629 * cover the entire range
631 right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
632 if (right_info && rb_prev(&right_info->offset_index))
633 left_info = rb_entry(rb_prev(&right_info->offset_index),
634 struct btrfs_free_space, offset_index);
636 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
639 * If there was no extent directly to the left or right of this new
640 * extent then we know we're going to have to allocate a new extent, so
641 * before we do that see if we need to drop this into a bitmap
643 if ((!left_info || left_info->bitmap) &&
644 (!right_info || right_info->bitmap)) {
645 ret = insert_into_bitmap(block_group, info);
655 if (right_info && !right_info->bitmap) {
656 unlink_free_space(block_group, right_info);
657 info->bytes += right_info->bytes;
661 if (left_info && !left_info->bitmap &&
662 left_info->offset + left_info->bytes == offset) {
663 unlink_free_space(block_group, left_info);
664 info->offset = left_info->offset;
665 info->bytes += left_info->bytes;
669 ret = link_free_space(block_group, info);
673 spin_unlock(&block_group->tree_lock);
676 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
677 BUG_ON(ret == -EEXIST);
683 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
684 u64 offset, u64 bytes)
686 struct btrfs_free_space *info;
687 struct btrfs_free_space *next_info = NULL;
690 spin_lock(&block_group->tree_lock);
693 info = tree_search_offset(block_group, offset, 0, 0);
696 * oops didn't find an extent that matched the space we wanted
697 * to remove, look for a bitmap instead
699 info = tree_search_offset(block_group,
700 offset_to_bitmap(block_group, offset),
708 if (info->bytes < bytes && rb_next(&info->offset_index)) {
710 next_info = rb_entry(rb_next(&info->offset_index),
711 struct btrfs_free_space,
714 if (next_info->bitmap)
715 end = next_info->offset + BITS_PER_BITMAP *
716 block_group->sectorsize - 1;
718 end = next_info->offset + next_info->bytes;
720 if (next_info->bytes < bytes ||
721 next_info->offset > offset || offset > end) {
722 printk(KERN_CRIT "Found free space at %llu, size %llu,"
723 " trying to use %llu\n",
724 (unsigned long long)info->offset,
725 (unsigned long long)info->bytes,
726 (unsigned long long)bytes);
735 if (info->bytes == bytes) {
736 unlink_free_space(block_group, info);
739 block_group->total_bitmaps--;
745 if (!info->bitmap && info->offset == offset) {
746 unlink_free_space(block_group, info);
747 info->offset += bytes;
748 info->bytes -= bytes;
749 link_free_space(block_group, info);
753 if (!info->bitmap && info->offset <= offset &&
754 info->offset + info->bytes >= offset + bytes) {
755 u64 old_start = info->offset;
757 * we're freeing space in the middle of the info,
758 * this can happen during tree log replay
760 * first unlink the old info and then
761 * insert it again after the hole we're creating
763 unlink_free_space(block_group, info);
764 if (offset + bytes < info->offset + info->bytes) {
765 u64 old_end = info->offset + info->bytes;
767 info->offset = offset + bytes;
768 info->bytes = old_end - info->offset;
769 ret = link_free_space(block_group, info);
774 /* the hole we're creating ends at the end
775 * of the info struct, just free the info
779 spin_unlock(&block_group->tree_lock);
781 /* step two, insert a new info struct to cover
782 * anything before the hole
784 ret = btrfs_add_free_space(block_group, old_start,
790 ret = remove_from_bitmap(block_group, info, &offset, &bytes);
795 spin_unlock(&block_group->tree_lock);
800 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
803 struct btrfs_free_space *info;
807 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
808 info = rb_entry(n, struct btrfs_free_space, offset_index);
809 if (info->bytes >= bytes)
811 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
812 (unsigned long long)info->offset,
813 (unsigned long long)info->bytes,
814 (info->bitmap) ? "yes" : "no");
816 printk(KERN_INFO "block group has cluster?: %s\n",
817 list_empty(&block_group->cluster_list) ? "no" : "yes");
818 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
822 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
824 struct btrfs_free_space *info;
828 for (n = rb_first(&block_group->free_space_offset); n;
830 info = rb_entry(n, struct btrfs_free_space, offset_index);
838 * for a given cluster, put all of its extents back into the free
839 * space cache. If the block group passed doesn't match the block group
840 * pointed to by the cluster, someone else raced in and freed the
841 * cluster already. In that case, we just return without changing anything
844 __btrfs_return_cluster_to_free_space(
845 struct btrfs_block_group_cache *block_group,
846 struct btrfs_free_cluster *cluster)
848 struct btrfs_free_space *entry;
849 struct rb_node *node;
852 spin_lock(&cluster->lock);
853 if (cluster->block_group != block_group)
856 bitmap = cluster->points_to_bitmap;
857 cluster->block_group = NULL;
858 cluster->window_start = 0;
859 list_del_init(&cluster->block_group_list);
860 cluster->points_to_bitmap = false;
865 node = rb_first(&cluster->root);
867 entry = rb_entry(node, struct btrfs_free_space, offset_index);
868 node = rb_next(&entry->offset_index);
869 rb_erase(&entry->offset_index, &cluster->root);
870 BUG_ON(entry->bitmap);
871 tree_insert_offset(&block_group->free_space_offset,
872 entry->offset, &entry->offset_index, 0);
874 cluster->root = RB_ROOT;
877 spin_unlock(&cluster->lock);
878 btrfs_put_block_group(block_group);
882 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
884 struct btrfs_free_space *info;
885 struct rb_node *node;
886 struct btrfs_free_cluster *cluster;
887 struct list_head *head;
889 spin_lock(&block_group->tree_lock);
890 while ((head = block_group->cluster_list.next) !=
891 &block_group->cluster_list) {
892 cluster = list_entry(head, struct btrfs_free_cluster,
895 WARN_ON(cluster->block_group != block_group);
896 __btrfs_return_cluster_to_free_space(block_group, cluster);
897 if (need_resched()) {
898 spin_unlock(&block_group->tree_lock);
900 spin_lock(&block_group->tree_lock);
904 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
905 info = rb_entry(node, struct btrfs_free_space, offset_index);
906 unlink_free_space(block_group, info);
910 if (need_resched()) {
911 spin_unlock(&block_group->tree_lock);
913 spin_lock(&block_group->tree_lock);
917 spin_unlock(&block_group->tree_lock);
920 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
921 u64 offset, u64 bytes, u64 empty_size)
923 struct btrfs_free_space *entry = NULL;
924 u64 bytes_search = bytes + empty_size;
927 spin_lock(&block_group->tree_lock);
928 entry = find_free_space(block_group, &offset, &bytes_search, 0);
934 bitmap_clear_bits(block_group, entry, offset, bytes);
936 unlink_free_space(block_group, entry);
937 kfree(entry->bitmap);
939 block_group->total_bitmaps--;
940 recalculate_thresholds(block_group);
943 unlink_free_space(block_group, entry);
944 entry->offset += bytes;
945 entry->bytes -= bytes;
949 link_free_space(block_group, entry);
953 spin_unlock(&block_group->tree_lock);
959 * given a cluster, put all of its extents back into the free space
960 * cache. If a block group is passed, this function will only free
961 * a cluster that belongs to the passed block group.
963 * Otherwise, it'll get a reference on the block group pointed to by the
964 * cluster and remove the cluster from it.
966 int btrfs_return_cluster_to_free_space(
967 struct btrfs_block_group_cache *block_group,
968 struct btrfs_free_cluster *cluster)
972 /* first, get a safe pointer to the block group */
973 spin_lock(&cluster->lock);
975 block_group = cluster->block_group;
977 spin_unlock(&cluster->lock);
980 } else if (cluster->block_group != block_group) {
981 /* someone else has already freed it don't redo their work */
982 spin_unlock(&cluster->lock);
985 atomic_inc(&block_group->count);
986 spin_unlock(&cluster->lock);
988 /* now return any extents the cluster had on it */
989 spin_lock(&block_group->tree_lock);
990 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
991 spin_unlock(&block_group->tree_lock);
993 /* finally drop our ref */
994 btrfs_put_block_group(block_group);
998 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
999 struct btrfs_free_cluster *cluster,
1000 u64 bytes, u64 min_start)
1002 struct btrfs_free_space *entry;
1004 u64 search_start = cluster->window_start;
1005 u64 search_bytes = bytes;
1008 spin_lock(&block_group->tree_lock);
1009 spin_lock(&cluster->lock);
1011 if (!cluster->points_to_bitmap)
1014 if (cluster->block_group != block_group)
1018 * search_start is the beginning of the bitmap, but at some point it may
1019 * be a good idea to point to the actual start of the free area in the
1020 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1021 * to 1 to make sure we get the bitmap entry
1023 entry = tree_search_offset(block_group,
1024 offset_to_bitmap(block_group, search_start),
1026 if (!entry || !entry->bitmap)
1029 search_start = min_start;
1030 search_bytes = bytes;
1032 err = search_bitmap(block_group, entry, &search_start,
1038 bitmap_clear_bits(block_group, entry, ret, bytes);
1040 spin_unlock(&cluster->lock);
1041 spin_unlock(&block_group->tree_lock);
1047 * given a cluster, try to allocate 'bytes' from it, returns 0
1048 * if it couldn't find anything suitably large, or a logical disk offset
1049 * if things worked out
1051 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1052 struct btrfs_free_cluster *cluster, u64 bytes,
1055 struct btrfs_free_space *entry = NULL;
1056 struct rb_node *node;
1059 if (cluster->points_to_bitmap)
1060 return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1063 spin_lock(&cluster->lock);
1064 if (bytes > cluster->max_size)
1067 if (cluster->block_group != block_group)
1070 node = rb_first(&cluster->root);
1074 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1077 if (entry->bytes < bytes || entry->offset < min_start) {
1078 struct rb_node *node;
1080 node = rb_next(&entry->offset_index);
1083 entry = rb_entry(node, struct btrfs_free_space,
1087 ret = entry->offset;
1089 entry->offset += bytes;
1090 entry->bytes -= bytes;
1092 if (entry->bytes == 0) {
1093 rb_erase(&entry->offset_index, &cluster->root);
1099 spin_unlock(&cluster->lock);
1104 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1105 struct btrfs_free_space *entry,
1106 struct btrfs_free_cluster *cluster,
1107 u64 offset, u64 bytes, u64 min_bytes)
1109 unsigned long next_zero;
1111 unsigned long search_bits;
1112 unsigned long total_bits;
1113 unsigned long found_bits;
1114 unsigned long start = 0;
1115 unsigned long total_found = 0;
1118 i = offset_to_bit(entry->offset, block_group->sectorsize,
1119 max_t(u64, offset, entry->offset));
1120 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1121 total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1125 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1126 i < BITS_PER_BITMAP;
1127 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1128 next_zero = find_next_zero_bit(entry->bitmap,
1129 BITS_PER_BITMAP, i);
1130 if (next_zero - i >= search_bits) {
1131 found_bits = next_zero - i;
1145 total_found += found_bits;
1147 if (cluster->max_size < found_bits * block_group->sectorsize)
1148 cluster->max_size = found_bits * block_group->sectorsize;
1150 if (total_found < total_bits) {
1151 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1152 if (i - start > total_bits * 2) {
1154 cluster->max_size = 0;
1160 cluster->window_start = start * block_group->sectorsize +
1162 cluster->points_to_bitmap = true;
1168 * here we try to find a cluster of blocks in a block group. The goal
1169 * is to find at least bytes free and up to empty_size + bytes free.
1170 * We might not find them all in one contiguous area.
1172 * returns zero and sets up cluster if things worked out, otherwise
1173 * it returns -enospc
1175 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
1176 struct btrfs_root *root,
1177 struct btrfs_block_group_cache *block_group,
1178 struct btrfs_free_cluster *cluster,
1179 u64 offset, u64 bytes, u64 empty_size)
1181 struct btrfs_free_space *entry = NULL;
1182 struct rb_node *node;
1183 struct btrfs_free_space *next;
1184 struct btrfs_free_space *last = NULL;
1189 bool found_bitmap = false;
1192 /* for metadata, allow allocates with more holes */
1193 if (btrfs_test_opt(root, SSD_SPREAD)) {
1194 min_bytes = bytes + empty_size;
1195 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1197 * we want to do larger allocations when we are
1198 * flushing out the delayed refs, it helps prevent
1199 * making more work as we go along.
1201 if (trans->transaction->delayed_refs.flushing)
1202 min_bytes = max(bytes, (bytes + empty_size) >> 1);
1204 min_bytes = max(bytes, (bytes + empty_size) >> 4);
1206 min_bytes = max(bytes, (bytes + empty_size) >> 2);
1208 spin_lock(&block_group->tree_lock);
1209 spin_lock(&cluster->lock);
1211 /* someone already found a cluster, hooray */
1212 if (cluster->block_group) {
1217 entry = tree_search_offset(block_group, offset, found_bitmap, 1);
1224 * If found_bitmap is true, we exhausted our search for extent entries,
1225 * and we just want to search all of the bitmaps that we can find, and
1226 * ignore any extent entries we find.
1228 while (entry->bitmap || found_bitmap ||
1229 (!entry->bitmap && entry->bytes < min_bytes)) {
1230 struct rb_node *node = rb_next(&entry->offset_index);
1232 if (entry->bitmap && entry->bytes > bytes + empty_size) {
1233 ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1234 offset, bytes + empty_size,
1244 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1248 * We already searched all the extent entries from the passed in offset
1249 * to the end and didn't find enough space for the cluster, and we also
1250 * didn't find any bitmaps that met our criteria, just go ahead and exit
1257 cluster->points_to_bitmap = false;
1258 window_start = entry->offset;
1259 window_free = entry->bytes;
1261 max_extent = entry->bytes;
1264 /* out window is just right, lets fill it */
1265 if (window_free >= bytes + empty_size)
1268 node = rb_next(&last->offset_index);
1275 next = rb_entry(node, struct btrfs_free_space, offset_index);
1278 * we found a bitmap, so if this search doesn't result in a
1279 * cluster, we know to go and search again for the bitmaps and
1280 * start looking for space there
1284 offset = next->offset;
1285 found_bitmap = true;
1291 * we haven't filled the empty size and the window is
1292 * very large. reset and try again
1294 if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
1295 next->offset - window_start > (bytes + empty_size) * 2) {
1297 window_start = entry->offset;
1298 window_free = entry->bytes;
1300 max_extent = entry->bytes;
1303 window_free += next->bytes;
1304 if (entry->bytes > max_extent)
1305 max_extent = entry->bytes;
1309 cluster->window_start = entry->offset;
1312 * now we've found our entries, pull them out of the free space
1313 * cache and put them into the cluster rbtree
1315 * The cluster includes an rbtree, but only uses the offset index
1316 * of each free space cache entry.
1319 node = rb_next(&entry->offset_index);
1320 if (entry->bitmap && node) {
1321 entry = rb_entry(node, struct btrfs_free_space,
1324 } else if (entry->bitmap && !node) {
1328 rb_erase(&entry->offset_index, &block_group->free_space_offset);
1329 ret = tree_insert_offset(&cluster->root, entry->offset,
1330 &entry->offset_index, 0);
1333 if (!node || entry == last)
1336 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1339 cluster->max_size = max_extent;
1342 atomic_inc(&block_group->count);
1343 list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
1344 cluster->block_group = block_group;
1346 spin_unlock(&cluster->lock);
1347 spin_unlock(&block_group->tree_lock);
1353 * simple code to zero out a cluster
1355 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
1357 spin_lock_init(&cluster->lock);
1358 spin_lock_init(&cluster->refill_lock);
1359 cluster->root = RB_ROOT;
1360 cluster->max_size = 0;
1361 cluster->points_to_bitmap = false;
1362 INIT_LIST_HEAD(&cluster->block_group_list);
1363 cluster->block_group = NULL;