3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
33 static u64 cache_soft_max = 1024 * 1024 * 256;
34 static u64 cache_hard_max = 1 * 1024 * 1024 * 1024;
36 void extent_io_tree_init(struct extent_io_tree *tree)
38 cache_tree_init(&tree->state);
39 cache_tree_init(&tree->cache);
40 INIT_LIST_HEAD(&tree->lru);
44 static struct extent_state *alloc_extent_state(void)
46 struct extent_state *state;
48 state = malloc(sizeof(*state));
51 state->cache_node.objectid = 0;
58 static void btrfs_free_extent_state(struct extent_state *state)
61 BUG_ON(state->refs < 0);
66 static void free_extent_state_func(struct cache_extent *cache)
68 struct extent_state *es;
70 es = container_of(cache, struct extent_state, cache_node);
71 btrfs_free_extent_state(es);
74 void extent_io_tree_cleanup(struct extent_io_tree *tree)
76 struct extent_buffer *eb;
78 while(!list_empty(&tree->lru)) {
79 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
81 fprintf(stderr, "extent buffer leak: "
82 "start %llu len %u\n",
83 (unsigned long long)eb->start, eb->len);
86 free_extent_buffer(eb);
89 cache_tree_free_extents(&tree->state, free_extent_state_func);
92 static inline void update_extent_state(struct extent_state *state)
94 state->cache_node.start = state->start;
95 state->cache_node.size = state->end + 1 - state->start;
99 * Utility function to look for merge candidates inside a given range.
100 * Any extents with matching state are merged together into a single
101 * extent in the tree. Extents with EXTENT_IO in their state field are
104 static int merge_state(struct extent_io_tree *tree,
105 struct extent_state *state)
107 struct extent_state *other;
108 struct cache_extent *other_node;
110 if (state->state & EXTENT_IOBITS)
113 other_node = prev_cache_extent(&state->cache_node);
115 other = container_of(other_node, struct extent_state,
117 if (other->end == state->start - 1 &&
118 other->state == state->state) {
119 state->start = other->start;
120 update_extent_state(state);
121 remove_cache_extent(&tree->state, &other->cache_node);
122 btrfs_free_extent_state(other);
125 other_node = next_cache_extent(&state->cache_node);
127 other = container_of(other_node, struct extent_state,
129 if (other->start == state->end + 1 &&
130 other->state == state->state) {
131 other->start = state->start;
132 update_extent_state(other);
133 remove_cache_extent(&tree->state, &state->cache_node);
134 btrfs_free_extent_state(state);
141 * insert an extent_state struct into the tree. 'bits' are set on the
142 * struct before it is inserted.
144 static int insert_state(struct extent_io_tree *tree,
145 struct extent_state *state, u64 start, u64 end,
151 state->state |= bits;
152 state->start = start;
154 update_extent_state(state);
155 ret = insert_cache_extent(&tree->state, &state->cache_node);
157 merge_state(tree, state);
162 * split a given extent state struct in two, inserting the preallocated
163 * struct 'prealloc' as the newly created second half. 'split' indicates an
164 * offset inside 'orig' where it should be split.
166 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
167 struct extent_state *prealloc, u64 split)
170 prealloc->start = orig->start;
171 prealloc->end = split - 1;
172 prealloc->state = orig->state;
173 update_extent_state(prealloc);
175 update_extent_state(orig);
176 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
182 * clear some bits on a range in the tree.
184 static int clear_state_bit(struct extent_io_tree *tree,
185 struct extent_state *state, int bits)
187 int ret = state->state & bits;
189 state->state &= ~bits;
190 if (state->state == 0) {
191 remove_cache_extent(&tree->state, &state->cache_node);
192 btrfs_free_extent_state(state);
194 merge_state(tree, state);
200 * clear some bits on a range in the tree.
202 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
203 u64 end, int bits, gfp_t mask)
205 struct extent_state *state;
206 struct extent_state *prealloc = NULL;
207 struct cache_extent *node;
214 prealloc = alloc_extent_state();
220 * this search will find the extents that end after
223 node = search_cache_extent(&tree->state, start);
226 state = container_of(node, struct extent_state, cache_node);
227 if (state->start > end)
229 last_end = state->end;
232 * | ---- desired range ---- |
234 * | ------------- state -------------- |
236 * We need to split the extent we found, and may flip
237 * bits on second half.
239 * If the extent we found extends past our range, we
240 * just split and search again. It'll get split again
241 * the next time though.
243 * If the extent we found is inside our range, we clear
244 * the desired bit on it.
246 if (state->start < start) {
247 err = split_state(tree, state, prealloc, start);
248 BUG_ON(err == -EEXIST);
252 if (state->end <= end) {
253 set |= clear_state_bit(tree, state, bits);
254 if (last_end == (u64)-1)
256 start = last_end + 1;
258 start = state->start;
263 * | ---- desired range ---- |
265 * We need to split the extent, and clear the bit
268 if (state->start <= end && state->end > end) {
269 err = split_state(tree, state, prealloc, end + 1);
270 BUG_ON(err == -EEXIST);
272 set |= clear_state_bit(tree, prealloc, bits);
277 start = state->end + 1;
278 set |= clear_state_bit(tree, state, bits);
279 if (last_end == (u64)-1)
281 start = last_end + 1;
285 btrfs_free_extent_state(prealloc);
295 * set some bits on a range in the tree.
297 int set_extent_bits(struct extent_io_tree *tree, u64 start,
298 u64 end, int bits, gfp_t mask)
300 struct extent_state *state;
301 struct extent_state *prealloc = NULL;
302 struct cache_extent *node;
308 prealloc = alloc_extent_state();
314 * this search will find the extents that end after
317 node = search_cache_extent(&tree->state, start);
319 err = insert_state(tree, prealloc, start, end, bits);
320 BUG_ON(err == -EEXIST);
325 state = container_of(node, struct extent_state, cache_node);
326 last_start = state->start;
327 last_end = state->end;
330 * | ---- desired range ---- |
333 * Just lock what we found and keep going
335 if (state->start == start && state->end <= end) {
336 state->state |= bits;
337 merge_state(tree, state);
338 if (last_end == (u64)-1)
340 start = last_end + 1;
344 * | ---- desired range ---- |
347 * | ------------- state -------------- |
349 * We need to split the extent we found, and may flip bits on
352 * If the extent we found extends past our
353 * range, we just split and search again. It'll get split
354 * again the next time though.
356 * If the extent we found is inside our range, we set the
359 if (state->start < start) {
360 err = split_state(tree, state, prealloc, start);
361 BUG_ON(err == -EEXIST);
365 if (state->end <= end) {
366 state->state |= bits;
367 start = state->end + 1;
368 merge_state(tree, state);
369 if (last_end == (u64)-1)
371 start = last_end + 1;
373 start = state->start;
378 * | ---- desired range ---- |
379 * | state | or | state |
381 * There's a hole, we need to insert something in it and
382 * ignore the extent we found.
384 if (state->start > start) {
386 if (end < last_start)
389 this_end = last_start -1;
390 err = insert_state(tree, prealloc, start, this_end,
392 BUG_ON(err == -EEXIST);
396 start = this_end + 1;
400 * | ---- desired range ---- |
401 * | ---------- state ---------- |
402 * We need to split the extent, and set the bit
405 err = split_state(tree, state, prealloc, end + 1);
406 BUG_ON(err == -EEXIST);
408 state->state |= bits;
409 merge_state(tree, prealloc);
413 btrfs_free_extent_state(prealloc);
421 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
424 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
427 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
430 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
433 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
434 u64 *start_ret, u64 *end_ret, int bits)
436 struct cache_extent *node;
437 struct extent_state *state;
441 * this search will find all the extents that end after
444 node = search_cache_extent(&tree->state, start);
449 state = container_of(node, struct extent_state, cache_node);
450 if (state->end >= start && (state->state & bits)) {
451 *start_ret = state->start;
452 *end_ret = state->end;
456 node = next_cache_extent(node);
464 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
465 int bits, int filled)
467 struct extent_state *state = NULL;
468 struct cache_extent *node;
471 node = search_cache_extent(&tree->state, start);
472 while (node && start <= end) {
473 state = container_of(node, struct extent_state, cache_node);
475 if (filled && state->start > start) {
479 if (state->start > end)
481 if (state->state & bits) {
489 start = state->end + 1;
492 node = next_cache_extent(node);
502 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
504 struct cache_extent *node;
505 struct extent_state *state;
508 node = search_cache_extent(&tree->state, start);
513 state = container_of(node, struct extent_state, cache_node);
514 if (state->start != start) {
518 state->xprivate = private;
523 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
525 struct cache_extent *node;
526 struct extent_state *state;
529 node = search_cache_extent(&tree->state, start);
534 state = container_of(node, struct extent_state, cache_node);
535 if (state->start != start) {
539 *private = state->xprivate;
544 static int free_some_buffers(struct extent_io_tree *tree)
547 struct extent_buffer *eb;
548 struct list_head *node, *next;
550 if (tree->cache_size < cache_soft_max)
553 list_for_each_safe(node, next, &tree->lru) {
554 eb = list_entry(node, struct extent_buffer, lru);
556 free_extent_buffer(eb);
557 if (tree->cache_size < cache_hard_max)
560 list_move_tail(&eb->lru, &tree->lru);
562 if (nrscan++ > 64 && tree->cache_size < cache_hard_max)
568 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
569 u64 bytenr, u32 blocksize)
571 struct extent_buffer *eb;
574 eb = malloc(sizeof(struct extent_buffer) + blocksize);
579 memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
587 eb->dev_bytenr = (u64)-1;
588 eb->cache_node.start = bytenr;
589 eb->cache_node.size = blocksize;
591 free_some_buffers(tree);
592 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
597 list_add_tail(&eb->lru, &tree->lru);
598 tree->cache_size += blocksize;
602 void free_extent_buffer(struct extent_buffer *eb)
608 BUG_ON(eb->refs < 0);
610 struct extent_io_tree *tree = eb->tree;
611 BUG_ON(eb->flags & EXTENT_DIRTY);
612 list_del_init(&eb->lru);
613 remove_cache_extent(&tree->cache, &eb->cache_node);
614 BUG_ON(tree->cache_size < eb->len);
615 tree->cache_size -= eb->len;
620 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
621 u64 bytenr, u32 blocksize)
623 struct extent_buffer *eb = NULL;
624 struct cache_extent *cache;
626 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
627 if (cache && cache->start == bytenr &&
628 cache->size == blocksize) {
629 eb = container_of(cache, struct extent_buffer, cache_node);
630 list_move_tail(&eb->lru, &tree->lru);
636 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
639 struct extent_buffer *eb = NULL;
640 struct cache_extent *cache;
642 cache = search_cache_extent(&tree->cache, start);
644 eb = container_of(cache, struct extent_buffer, cache_node);
645 list_move_tail(&eb->lru, &tree->lru);
651 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
652 u64 bytenr, u32 blocksize)
654 struct extent_buffer *eb;
655 struct cache_extent *cache;
657 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
658 if (cache && cache->start == bytenr &&
659 cache->size == blocksize) {
660 eb = container_of(cache, struct extent_buffer, cache_node);
661 list_move_tail(&eb->lru, &tree->lru);
665 eb = container_of(cache, struct extent_buffer,
667 free_extent_buffer(eb);
669 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
674 int read_extent_from_disk(struct extent_buffer *eb,
675 unsigned long offset, unsigned long len)
678 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
690 int write_extent_to_disk(struct extent_buffer *eb)
693 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
696 if (ret != eb->len) {
705 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
706 u64 bytes, int mirror)
708 struct btrfs_multi_bio *multi = NULL;
709 struct btrfs_device *device;
710 u64 bytes_left = bytes;
716 read_len = bytes_left;
717 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
718 &read_len, &multi, mirror, NULL);
720 fprintf(stderr, "Couldn't map the block %Lu\n",
724 device = multi->stripes[0].dev;
726 read_len = min(bytes_left, read_len);
727 if (device->fd == 0) {
732 ret = pread(device->fd, buf + total_read, read_len,
733 multi->stripes[0].physical);
736 fprintf(stderr, "Error reading %Lu, %d\n", offset,
740 if (ret != read_len) {
741 fprintf(stderr, "Short read for %Lu, read %d, "
742 "read_len %Lu\n", offset, ret, read_len);
746 bytes_left -= read_len;
748 total_read += read_len;
754 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
755 u64 bytes, int mirror)
757 struct btrfs_multi_bio *multi = NULL;
758 struct btrfs_device *device;
759 u64 bytes_left = bytes;
762 u64 *raid_map = NULL;
767 while (bytes_left > 0) {
768 this_len = bytes_left;
771 ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
772 &this_len, &multi, mirror, &raid_map);
774 fprintf(stderr, "Couldn't map the block %Lu\n",
780 struct extent_buffer *eb;
781 u64 stripe_len = this_len;
783 this_len = min(this_len, bytes_left);
784 this_len = min(this_len, (u64)info->tree_root->leafsize);
786 eb = malloc(sizeof(struct extent_buffer) + this_len);
789 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
793 memcpy(eb->data, buf + total_write, this_len);
794 ret = write_raid56_with_parity(info, eb, multi,
795 stripe_len, raid_map);
801 } else while (dev_nr < multi->num_stripes) {
802 device = multi->stripes[dev_nr].dev;
803 if (device->fd == 0) {
808 dev_bytenr = multi->stripes[dev_nr].physical;
809 this_len = min(this_len, bytes_left);
812 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
813 if (ret != this_len) {
815 fprintf(stderr, "Error writing to "
816 "device %d\n", errno);
821 fprintf(stderr, "Short write\n");
828 BUG_ON(bytes_left < this_len);
830 bytes_left -= this_len;
832 total_write += this_len;
841 int set_extent_buffer_uptodate(struct extent_buffer *eb)
843 eb->flags |= EXTENT_UPTODATE;
847 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
848 struct extent_buffer *eb)
850 eb->flags &= ~EXTENT_UPTODATE;
854 int extent_buffer_uptodate(struct extent_buffer *eb)
859 if (eb->flags & EXTENT_UPTODATE)
864 int set_extent_buffer_dirty(struct extent_buffer *eb)
866 struct extent_io_tree *tree = eb->tree;
867 if (!(eb->flags & EXTENT_DIRTY)) {
868 eb->flags |= EXTENT_DIRTY;
869 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
870 extent_buffer_get(eb);
875 int clear_extent_buffer_dirty(struct extent_buffer *eb)
877 struct extent_io_tree *tree = eb->tree;
878 if (eb->flags & EXTENT_DIRTY) {
879 eb->flags &= ~EXTENT_DIRTY;
880 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
881 free_extent_buffer(eb);
886 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
887 unsigned long start, unsigned long len)
889 return memcmp(eb->data + start, ptrv, len);
892 void read_extent_buffer(struct extent_buffer *eb, void *dst,
893 unsigned long start, unsigned long len)
895 memcpy(dst, eb->data + start, len);
898 void write_extent_buffer(struct extent_buffer *eb, const void *src,
899 unsigned long start, unsigned long len)
901 memcpy(eb->data + start, src, len);
904 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
905 unsigned long dst_offset, unsigned long src_offset,
908 memcpy(dst->data + dst_offset, src->data + src_offset, len);
911 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
912 unsigned long src_offset, unsigned long len)
914 memmove(dst->data + dst_offset, dst->data + src_offset, len);
917 void memset_extent_buffer(struct extent_buffer *eb, char c,
918 unsigned long start, unsigned long len)
920 memset(eb->data + start, c, len);