3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
33 static u64 cache_soft_max = 1024 * 1024 * 256;
34 static u64 cache_hard_max = 1 * 1024 * 1024 * 1024;
36 void extent_io_tree_init(struct extent_io_tree *tree)
38 cache_tree_init(&tree->state);
39 cache_tree_init(&tree->cache);
40 INIT_LIST_HEAD(&tree->lru);
44 static struct extent_state *alloc_extent_state(void)
46 struct extent_state *state;
48 state = malloc(sizeof(*state));
51 state->cache_node.objectid = 0;
58 static void btrfs_free_extent_state(struct extent_state *state)
61 BUG_ON(state->refs < 0);
66 static void free_extent_state_func(struct cache_extent *cache)
68 struct extent_state *es;
70 es = container_of(cache, struct extent_state, cache_node);
71 btrfs_free_extent_state(es);
74 void extent_io_tree_cleanup(struct extent_io_tree *tree)
76 struct extent_buffer *eb;
78 while(!list_empty(&tree->lru)) {
79 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
81 fprintf(stderr, "extent buffer leak: "
82 "start %llu len %u\n",
83 (unsigned long long)eb->start, eb->len);
86 free_extent_buffer(eb);
89 cache_tree_free_extents(&tree->state, free_extent_state_func);
92 static inline void update_extent_state(struct extent_state *state)
94 state->cache_node.start = state->start;
95 state->cache_node.size = state->end + 1 - state->start;
99 * Utility function to look for merge candidates inside a given range.
100 * Any extents with matching state are merged together into a single
101 * extent in the tree. Extents with EXTENT_IO in their state field are
104 static int merge_state(struct extent_io_tree *tree,
105 struct extent_state *state)
107 struct extent_state *other;
108 struct cache_extent *other_node;
110 if (state->state & EXTENT_IOBITS)
113 other_node = prev_cache_extent(&state->cache_node);
115 other = container_of(other_node, struct extent_state,
117 if (other->end == state->start - 1 &&
118 other->state == state->state) {
119 state->start = other->start;
120 update_extent_state(state);
121 remove_cache_extent(&tree->state, &other->cache_node);
122 btrfs_free_extent_state(other);
125 other_node = next_cache_extent(&state->cache_node);
127 other = container_of(other_node, struct extent_state,
129 if (other->start == state->end + 1 &&
130 other->state == state->state) {
131 other->start = state->start;
132 update_extent_state(other);
133 remove_cache_extent(&tree->state, &state->cache_node);
134 btrfs_free_extent_state(state);
141 * insert an extent_state struct into the tree. 'bits' are set on the
142 * struct before it is inserted.
144 static int insert_state(struct extent_io_tree *tree,
145 struct extent_state *state, u64 start, u64 end,
151 state->state |= bits;
152 state->start = start;
154 update_extent_state(state);
155 ret = insert_cache_extent(&tree->state, &state->cache_node);
157 merge_state(tree, state);
162 * split a given extent state struct in two, inserting the preallocated
163 * struct 'prealloc' as the newly created second half. 'split' indicates an
164 * offset inside 'orig' where it should be split.
166 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
167 struct extent_state *prealloc, u64 split)
170 prealloc->start = orig->start;
171 prealloc->end = split - 1;
172 prealloc->state = orig->state;
173 update_extent_state(prealloc);
175 update_extent_state(orig);
176 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
182 * clear some bits on a range in the tree.
184 static int clear_state_bit(struct extent_io_tree *tree,
185 struct extent_state *state, int bits)
187 int ret = state->state & bits;
189 state->state &= ~bits;
190 if (state->state == 0) {
191 remove_cache_extent(&tree->state, &state->cache_node);
192 btrfs_free_extent_state(state);
194 merge_state(tree, state);
200 * clear some bits on a range in the tree.
202 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
203 u64 end, int bits, gfp_t mask)
205 struct extent_state *state;
206 struct extent_state *prealloc = NULL;
207 struct cache_extent *node;
214 prealloc = alloc_extent_state();
220 * this search will find the extents that end after
223 node = search_cache_extent(&tree->state, start);
226 state = container_of(node, struct extent_state, cache_node);
227 if (state->start > end)
229 last_end = state->end;
232 * | ---- desired range ---- |
234 * | ------------- state -------------- |
236 * We need to split the extent we found, and may flip
237 * bits on second half.
239 * If the extent we found extends past our range, we
240 * just split and search again. It'll get split again
241 * the next time though.
243 * If the extent we found is inside our range, we clear
244 * the desired bit on it.
246 if (state->start < start) {
247 err = split_state(tree, state, prealloc, start);
248 BUG_ON(err == -EEXIST);
252 if (state->end <= end) {
253 set |= clear_state_bit(tree, state, bits);
254 if (last_end == (u64)-1)
256 start = last_end + 1;
258 start = state->start;
263 * | ---- desired range ---- |
265 * We need to split the extent, and clear the bit
268 if (state->start <= end && state->end > end) {
269 err = split_state(tree, state, prealloc, end + 1);
270 BUG_ON(err == -EEXIST);
272 set |= clear_state_bit(tree, prealloc, bits);
277 start = state->end + 1;
278 set |= clear_state_bit(tree, state, bits);
279 if (last_end == (u64)-1)
281 start = last_end + 1;
285 btrfs_free_extent_state(prealloc);
295 * set some bits on a range in the tree.
297 int set_extent_bits(struct extent_io_tree *tree, u64 start,
298 u64 end, int bits, gfp_t mask)
300 struct extent_state *state;
301 struct extent_state *prealloc = NULL;
302 struct cache_extent *node;
308 prealloc = alloc_extent_state();
314 * this search will find the extents that end after
317 node = search_cache_extent(&tree->state, start);
319 err = insert_state(tree, prealloc, start, end, bits);
320 BUG_ON(err == -EEXIST);
325 state = container_of(node, struct extent_state, cache_node);
326 last_start = state->start;
327 last_end = state->end;
330 * | ---- desired range ---- |
333 * Just lock what we found and keep going
335 if (state->start == start && state->end <= end) {
336 state->state |= bits;
337 merge_state(tree, state);
338 if (last_end == (u64)-1)
340 start = last_end + 1;
344 * | ---- desired range ---- |
347 * | ------------- state -------------- |
349 * We need to split the extent we found, and may flip bits on
352 * If the extent we found extends past our
353 * range, we just split and search again. It'll get split
354 * again the next time though.
356 * If the extent we found is inside our range, we set the
359 if (state->start < start) {
360 err = split_state(tree, state, prealloc, start);
361 BUG_ON(err == -EEXIST);
365 if (state->end <= end) {
366 state->state |= bits;
367 start = state->end + 1;
368 merge_state(tree, state);
369 if (last_end == (u64)-1)
371 start = last_end + 1;
373 start = state->start;
378 * | ---- desired range ---- |
379 * | state | or | state |
381 * There's a hole, we need to insert something in it and
382 * ignore the extent we found.
384 if (state->start > start) {
386 if (end < last_start)
389 this_end = last_start -1;
390 err = insert_state(tree, prealloc, start, this_end,
392 BUG_ON(err == -EEXIST);
396 start = this_end + 1;
400 * | ---- desired range ---- |
401 * | ---------- state ---------- |
402 * We need to split the extent, and set the bit
405 err = split_state(tree, state, prealloc, end + 1);
406 BUG_ON(err == -EEXIST);
408 state->state |= bits;
409 merge_state(tree, prealloc);
413 btrfs_free_extent_state(prealloc);
421 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
424 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
427 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
430 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
433 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
434 u64 *start_ret, u64 *end_ret, int bits)
436 struct cache_extent *node;
437 struct extent_state *state;
441 * this search will find all the extents that end after
444 node = search_cache_extent(&tree->state, start);
449 state = container_of(node, struct extent_state, cache_node);
450 if (state->end >= start && (state->state & bits)) {
451 *start_ret = state->start;
452 *end_ret = state->end;
456 node = next_cache_extent(node);
464 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
465 int bits, int filled)
467 struct extent_state *state = NULL;
468 struct cache_extent *node;
471 node = search_cache_extent(&tree->state, start);
472 while (node && start <= end) {
473 state = container_of(node, struct extent_state, cache_node);
475 if (filled && state->start > start) {
479 if (state->start > end)
481 if (state->state & bits) {
489 start = state->end + 1;
492 node = next_cache_extent(node);
502 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
504 struct cache_extent *node;
505 struct extent_state *state;
508 node = search_cache_extent(&tree->state, start);
513 state = container_of(node, struct extent_state, cache_node);
514 if (state->start != start) {
518 state->xprivate = private;
523 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
525 struct cache_extent *node;
526 struct extent_state *state;
529 node = search_cache_extent(&tree->state, start);
534 state = container_of(node, struct extent_state, cache_node);
535 if (state->start != start) {
539 *private = state->xprivate;
544 static int free_some_buffers(struct extent_io_tree *tree)
547 struct extent_buffer *eb;
548 struct list_head *node, *next;
550 if (tree->cache_size < cache_soft_max)
553 list_for_each_safe(node, next, &tree->lru) {
554 eb = list_entry(node, struct extent_buffer, lru);
556 free_extent_buffer(eb);
557 if (tree->cache_size < cache_hard_max)
560 list_move_tail(&eb->lru, &tree->lru);
562 if (nrscan++ > 64 && tree->cache_size < cache_hard_max)
568 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
569 u64 bytenr, u32 blocksize)
571 struct extent_buffer *eb;
574 eb = malloc(sizeof(struct extent_buffer) + blocksize);
579 memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
587 eb->dev_bytenr = (u64)-1;
588 eb->cache_node.start = bytenr;
589 eb->cache_node.size = blocksize;
590 INIT_LIST_HEAD(&eb->recow);
592 free_some_buffers(tree);
593 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
598 list_add_tail(&eb->lru, &tree->lru);
599 tree->cache_size += blocksize;
603 void free_extent_buffer(struct extent_buffer *eb)
609 BUG_ON(eb->refs < 0);
611 struct extent_io_tree *tree = eb->tree;
612 BUG_ON(eb->flags & EXTENT_DIRTY);
613 list_del_init(&eb->lru);
614 list_del_init(&eb->recow);
615 remove_cache_extent(&tree->cache, &eb->cache_node);
616 BUG_ON(tree->cache_size < eb->len);
617 tree->cache_size -= eb->len;
622 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
623 u64 bytenr, u32 blocksize)
625 struct extent_buffer *eb = NULL;
626 struct cache_extent *cache;
628 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
629 if (cache && cache->start == bytenr &&
630 cache->size == blocksize) {
631 eb = container_of(cache, struct extent_buffer, cache_node);
632 list_move_tail(&eb->lru, &tree->lru);
638 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
641 struct extent_buffer *eb = NULL;
642 struct cache_extent *cache;
644 cache = search_cache_extent(&tree->cache, start);
646 eb = container_of(cache, struct extent_buffer, cache_node);
647 list_move_tail(&eb->lru, &tree->lru);
653 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
654 u64 bytenr, u32 blocksize)
656 struct extent_buffer *eb;
657 struct cache_extent *cache;
659 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
660 if (cache && cache->start == bytenr &&
661 cache->size == blocksize) {
662 eb = container_of(cache, struct extent_buffer, cache_node);
663 list_move_tail(&eb->lru, &tree->lru);
667 eb = container_of(cache, struct extent_buffer,
669 free_extent_buffer(eb);
671 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
676 int read_extent_from_disk(struct extent_buffer *eb,
677 unsigned long offset, unsigned long len)
680 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
692 int write_extent_to_disk(struct extent_buffer *eb)
695 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
698 if (ret != eb->len) {
707 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
708 u64 bytes, int mirror)
710 struct btrfs_multi_bio *multi = NULL;
711 struct btrfs_device *device;
712 u64 bytes_left = bytes;
718 read_len = bytes_left;
719 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
720 &read_len, &multi, mirror, NULL);
722 fprintf(stderr, "Couldn't map the block %Lu\n",
726 device = multi->stripes[0].dev;
728 read_len = min(bytes_left, read_len);
729 if (device->fd == 0) {
734 ret = pread(device->fd, buf + total_read, read_len,
735 multi->stripes[0].physical);
738 fprintf(stderr, "Error reading %Lu, %d\n", offset,
742 if (ret != read_len) {
743 fprintf(stderr, "Short read for %Lu, read %d, "
744 "read_len %Lu\n", offset, ret, read_len);
748 bytes_left -= read_len;
750 total_read += read_len;
756 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
757 u64 bytes, int mirror)
759 struct btrfs_multi_bio *multi = NULL;
760 struct btrfs_device *device;
761 u64 bytes_left = bytes;
764 u64 *raid_map = NULL;
769 while (bytes_left > 0) {
770 this_len = bytes_left;
773 ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
774 &this_len, &multi, mirror, &raid_map);
776 fprintf(stderr, "Couldn't map the block %Lu\n",
782 struct extent_buffer *eb;
783 u64 stripe_len = this_len;
785 this_len = min(this_len, bytes_left);
786 this_len = min(this_len, (u64)info->tree_root->leafsize);
788 eb = malloc(sizeof(struct extent_buffer) + this_len);
791 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
795 memcpy(eb->data, buf + total_write, this_len);
796 ret = write_raid56_with_parity(info, eb, multi,
797 stripe_len, raid_map);
803 } else while (dev_nr < multi->num_stripes) {
804 device = multi->stripes[dev_nr].dev;
805 if (device->fd == 0) {
810 dev_bytenr = multi->stripes[dev_nr].physical;
811 this_len = min(this_len, bytes_left);
814 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
815 if (ret != this_len) {
817 fprintf(stderr, "Error writing to "
818 "device %d\n", errno);
823 fprintf(stderr, "Short write\n");
830 BUG_ON(bytes_left < this_len);
832 bytes_left -= this_len;
834 total_write += this_len;
843 int set_extent_buffer_uptodate(struct extent_buffer *eb)
845 eb->flags |= EXTENT_UPTODATE;
849 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
850 struct extent_buffer *eb)
852 eb->flags &= ~EXTENT_UPTODATE;
856 int extent_buffer_uptodate(struct extent_buffer *eb)
861 if (eb->flags & EXTENT_UPTODATE)
866 int set_extent_buffer_dirty(struct extent_buffer *eb)
868 struct extent_io_tree *tree = eb->tree;
869 if (!(eb->flags & EXTENT_DIRTY)) {
870 eb->flags |= EXTENT_DIRTY;
871 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
872 extent_buffer_get(eb);
877 int clear_extent_buffer_dirty(struct extent_buffer *eb)
879 struct extent_io_tree *tree = eb->tree;
880 if (eb->flags & EXTENT_DIRTY) {
881 eb->flags &= ~EXTENT_DIRTY;
882 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
883 free_extent_buffer(eb);
888 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
889 unsigned long start, unsigned long len)
891 return memcmp(eb->data + start, ptrv, len);
894 void read_extent_buffer(struct extent_buffer *eb, void *dst,
895 unsigned long start, unsigned long len)
897 memcpy(dst, eb->data + start, len);
900 void write_extent_buffer(struct extent_buffer *eb, const void *src,
901 unsigned long start, unsigned long len)
903 memcpy(eb->data + start, src, len);
906 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
907 unsigned long dst_offset, unsigned long src_offset,
910 memcpy(dst->data + dst_offset, src->data + src_offset, len);
913 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
914 unsigned long src_offset, unsigned long len)
916 memmove(dst->data + dst_offset, dst->data + src_offset, len);
919 void memset_extent_buffer(struct extent_buffer *eb, char c,
920 unsigned long start, unsigned long len)
922 memset(eb->data + start, c, len);