3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
33 u64 cache_soft_max = 1024 * 1024 * 256;
34 u64 cache_hard_max = 1 * 1024 * 1024 * 1024;
36 void extent_io_tree_init(struct extent_io_tree *tree)
38 cache_tree_init(&tree->state);
39 cache_tree_init(&tree->cache);
40 INIT_LIST_HEAD(&tree->lru);
44 static struct extent_state *alloc_extent_state(void)
46 struct extent_state *state;
48 state = malloc(sizeof(*state));
57 static void free_extent_state(struct extent_state *state)
60 BUG_ON(state->refs < 0);
65 void extent_io_tree_cleanup(struct extent_io_tree *tree)
67 struct extent_state *es;
68 struct extent_buffer *eb;
69 struct cache_extent *cache;
71 while(!list_empty(&tree->lru)) {
72 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
74 fprintf(stderr, "extent buffer leak: "
75 "start %llu len %u\n",
76 (unsigned long long)eb->start, eb->len);
79 free_extent_buffer(eb);
82 cache = find_first_cache_extent(&tree->state, 0);
85 es = container_of(cache, struct extent_state, cache_node);
86 remove_cache_extent(&tree->state, &es->cache_node);
87 free_extent_state(es);
91 static inline void update_extent_state(struct extent_state *state)
93 state->cache_node.start = state->start;
94 state->cache_node.size = state->end + 1 - state->start;
98 * Utility function to look for merge candidates inside a given range.
99 * Any extents with matching state are merged together into a single
100 * extent in the tree. Extents with EXTENT_IO in their state field are
103 static int merge_state(struct extent_io_tree *tree,
104 struct extent_state *state)
106 struct extent_state *other;
107 struct cache_extent *other_node;
109 if (state->state & EXTENT_IOBITS)
112 other_node = prev_cache_extent(&state->cache_node);
114 other = container_of(other_node, struct extent_state,
116 if (other->end == state->start - 1 &&
117 other->state == state->state) {
118 state->start = other->start;
119 update_extent_state(state);
120 remove_cache_extent(&tree->state, &other->cache_node);
121 free_extent_state(other);
124 other_node = next_cache_extent(&state->cache_node);
126 other = container_of(other_node, struct extent_state,
128 if (other->start == state->end + 1 &&
129 other->state == state->state) {
130 other->start = state->start;
131 update_extent_state(other);
132 remove_cache_extent(&tree->state, &state->cache_node);
133 free_extent_state(state);
140 * insert an extent_state struct into the tree. 'bits' are set on the
141 * struct before it is inserted.
143 static int insert_state(struct extent_io_tree *tree,
144 struct extent_state *state, u64 start, u64 end,
150 state->state |= bits;
151 state->start = start;
153 update_extent_state(state);
154 ret = insert_existing_cache_extent(&tree->state, &state->cache_node);
156 merge_state(tree, state);
161 * split a given extent state struct in two, inserting the preallocated
162 * struct 'prealloc' as the newly created second half. 'split' indicates an
163 * offset inside 'orig' where it should be split.
165 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
166 struct extent_state *prealloc, u64 split)
169 prealloc->start = orig->start;
170 prealloc->end = split - 1;
171 prealloc->state = orig->state;
172 update_extent_state(prealloc);
174 update_extent_state(orig);
175 ret = insert_existing_cache_extent(&tree->state,
176 &prealloc->cache_node);
182 * clear some bits on a range in the tree.
184 static int clear_state_bit(struct extent_io_tree *tree,
185 struct extent_state *state, int bits)
187 int ret = state->state & bits;
189 state->state &= ~bits;
190 if (state->state == 0) {
191 remove_cache_extent(&tree->state, &state->cache_node);
192 free_extent_state(state);
194 merge_state(tree, state);
200 * set some bits on a range in the tree.
202 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
203 u64 end, int bits, gfp_t mask)
205 struct extent_state *state;
206 struct extent_state *prealloc = NULL;
207 struct cache_extent *node;
213 prealloc = alloc_extent_state();
218 * this search will find the extents that end after
221 node = find_first_cache_extent(&tree->state, start);
224 state = container_of(node, struct extent_state, cache_node);
225 if (state->start > end)
227 last_end = state->end;
230 * | ---- desired range ---- |
232 * | ------------- state -------------- |
234 * We need to split the extent we found, and may flip
235 * bits on second half.
237 * If the extent we found extends past our range, we
238 * just split and search again. It'll get split again
239 * the next time though.
241 * If the extent we found is inside our range, we clear
242 * the desired bit on it.
244 if (state->start < start) {
245 err = split_state(tree, state, prealloc, start);
246 BUG_ON(err == -EEXIST);
250 if (state->end <= end) {
251 set |= clear_state_bit(tree, state, bits);
252 if (last_end == (u64)-1)
254 start = last_end + 1;
256 start = state->start;
261 * | ---- desired range ---- |
263 * We need to split the extent, and clear the bit
266 if (state->start <= end && state->end > end) {
267 err = split_state(tree, state, prealloc, end + 1);
268 BUG_ON(err == -EEXIST);
270 set |= clear_state_bit(tree, prealloc, bits);
275 start = state->end + 1;
276 set |= clear_state_bit(tree, state, bits);
277 if (last_end == (u64)-1)
279 start = last_end + 1;
283 free_extent_state(prealloc);
293 * set some bits on a range in the tree.
295 int set_extent_bits(struct extent_io_tree *tree, u64 start,
296 u64 end, int bits, gfp_t mask)
298 struct extent_state *state;
299 struct extent_state *prealloc = NULL;
300 struct cache_extent *node;
306 prealloc = alloc_extent_state();
312 * this search will find the extents that end after
315 node = find_first_cache_extent(&tree->state, start);
317 err = insert_state(tree, prealloc, start, end, bits);
318 BUG_ON(err == -EEXIST);
323 state = container_of(node, struct extent_state, cache_node);
324 last_start = state->start;
325 last_end = state->end;
328 * | ---- desired range ---- |
331 * Just lock what we found and keep going
333 if (state->start == start && state->end <= end) {
334 state->state |= bits;
335 merge_state(tree, state);
336 if (last_end == (u64)-1)
338 start = last_end + 1;
342 * | ---- desired range ---- |
345 * | ------------- state -------------- |
347 * We need to split the extent we found, and may flip bits on
350 * If the extent we found extends past our
351 * range, we just split and search again. It'll get split
352 * again the next time though.
354 * If the extent we found is inside our range, we set the
357 if (state->start < start) {
358 err = split_state(tree, state, prealloc, start);
359 BUG_ON(err == -EEXIST);
363 if (state->end <= end) {
364 state->state |= bits;
365 start = state->end + 1;
366 merge_state(tree, state);
367 if (last_end == (u64)-1)
369 start = last_end + 1;
371 start = state->start;
376 * | ---- desired range ---- |
377 * | state | or | state |
379 * There's a hole, we need to insert something in it and
380 * ignore the extent we found.
382 if (state->start > start) {
384 if (end < last_start)
387 this_end = last_start -1;
388 err = insert_state(tree, prealloc, start, this_end,
390 BUG_ON(err == -EEXIST);
394 start = this_end + 1;
398 * | ---- desired range ---- |
399 * | ---------- state ---------- |
400 * We need to split the extent, and set the bit
403 err = split_state(tree, state, prealloc, end + 1);
404 BUG_ON(err == -EEXIST);
406 state->state |= bits;
407 merge_state(tree, prealloc);
411 free_extent_state(prealloc);
419 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
422 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
425 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
428 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
431 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
432 u64 *start_ret, u64 *end_ret, int bits)
434 struct cache_extent *node;
435 struct extent_state *state;
439 * this search will find all the extents that end after
442 node = find_first_cache_extent(&tree->state, start);
447 state = container_of(node, struct extent_state, cache_node);
448 if (state->end >= start && (state->state & bits)) {
449 *start_ret = state->start;
450 *end_ret = state->end;
454 node = next_cache_extent(node);
462 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
463 int bits, int filled)
465 struct extent_state *state = NULL;
466 struct cache_extent *node;
469 node = find_first_cache_extent(&tree->state, start);
470 while (node && start <= end) {
471 state = container_of(node, struct extent_state, cache_node);
473 if (filled && state->start > start) {
477 if (state->start > end)
479 if (state->state & bits) {
487 start = state->end + 1;
490 node = next_cache_extent(node);
500 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
502 struct cache_extent *node;
503 struct extent_state *state;
506 node = find_first_cache_extent(&tree->state, start);
511 state = container_of(node, struct extent_state, cache_node);
512 if (state->start != start) {
516 state->xprivate = private;
521 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
523 struct cache_extent *node;
524 struct extent_state *state;
527 node = find_first_cache_extent(&tree->state, start);
532 state = container_of(node, struct extent_state, cache_node);
533 if (state->start != start) {
537 *private = state->xprivate;
542 static int free_some_buffers(struct extent_io_tree *tree)
545 struct extent_buffer *eb;
546 struct list_head *node, *next;
548 if (tree->cache_size < cache_soft_max)
551 list_for_each_safe(node, next, &tree->lru) {
552 eb = list_entry(node, struct extent_buffer, lru);
554 free_extent_buffer(eb);
555 if (tree->cache_size < cache_hard_max)
558 list_move_tail(&eb->lru, &tree->lru);
560 if (nrscan++ > 64 && tree->cache_size < cache_hard_max)
566 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
567 u64 bytenr, u32 blocksize)
569 struct extent_buffer *eb;
572 eb = malloc(sizeof(struct extent_buffer) + blocksize);
577 memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
585 eb->dev_bytenr = (u64)-1;
586 eb->cache_node.start = bytenr;
587 eb->cache_node.size = blocksize;
589 free_some_buffers(tree);
590 ret = insert_existing_cache_extent(&tree->cache, &eb->cache_node);
595 list_add_tail(&eb->lru, &tree->lru);
596 tree->cache_size += blocksize;
600 void free_extent_buffer(struct extent_buffer *eb)
606 BUG_ON(eb->refs < 0);
608 struct extent_io_tree *tree = eb->tree;
609 BUG_ON(eb->flags & EXTENT_DIRTY);
610 list_del_init(&eb->lru);
611 remove_cache_extent(&tree->cache, &eb->cache_node);
612 BUG_ON(tree->cache_size < eb->len);
613 tree->cache_size -= eb->len;
618 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
619 u64 bytenr, u32 blocksize)
621 struct extent_buffer *eb = NULL;
622 struct cache_extent *cache;
624 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
625 if (cache && cache->start == bytenr && cache->size == blocksize) {
626 eb = container_of(cache, struct extent_buffer, cache_node);
627 list_move_tail(&eb->lru, &tree->lru);
633 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
636 struct extent_buffer *eb = NULL;
637 struct cache_extent *cache;
639 cache = find_first_cache_extent(&tree->cache, start);
641 eb = container_of(cache, struct extent_buffer, cache_node);
642 list_move_tail(&eb->lru, &tree->lru);
648 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
649 u64 bytenr, u32 blocksize)
651 struct extent_buffer *eb;
652 struct cache_extent *cache;
654 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
655 if (cache && cache->start == bytenr && cache->size == blocksize) {
656 eb = container_of(cache, struct extent_buffer, cache_node);
657 list_move_tail(&eb->lru, &tree->lru);
661 eb = container_of(cache, struct extent_buffer,
663 free_extent_buffer(eb);
665 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
670 int read_extent_from_disk(struct extent_buffer *eb,
671 unsigned long offset, unsigned long len)
674 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
686 int write_extent_to_disk(struct extent_buffer *eb)
689 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
692 if (ret != eb->len) {
701 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
702 u64 bytes, int mirror)
704 struct btrfs_multi_bio *multi = NULL;
705 struct btrfs_device *device;
706 u64 bytes_left = bytes;
712 read_len = bytes_left;
713 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
714 &read_len, &multi, mirror, NULL);
716 fprintf(stderr, "Couldn't map the block %Lu\n",
720 device = multi->stripes[0].dev;
722 read_len = min(bytes_left, read_len);
723 if (device->fd == 0) {
728 ret = pread(device->fd, buf + total_read, read_len,
729 multi->stripes[0].physical);
732 fprintf(stderr, "Error reading %Lu, %d\n", offset,
736 if (ret != read_len) {
737 fprintf(stderr, "Short read for %Lu, read %d, "
738 "read_len %Lu\n", offset, ret, read_len);
742 bytes_left -= read_len;
744 total_read += read_len;
750 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
751 u64 bytes, int mirror)
753 struct btrfs_multi_bio *multi = NULL;
754 struct btrfs_device *device;
755 u64 bytes_left = bytes;
758 u64 *raid_map = NULL;
763 while (bytes_left > 0) {
764 this_len = bytes_left;
767 ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
768 &this_len, &multi, mirror, &raid_map);
770 fprintf(stderr, "Couldn't map the block %Lu\n",
776 struct extent_buffer *eb;
777 u64 stripe_len = this_len;
779 this_len = min(this_len, bytes_left);
780 this_len = min(this_len, (u64)info->tree_root->leafsize);
782 eb = malloc(sizeof(struct extent_buffer) + this_len);
785 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
789 memcpy(eb->data, buf + total_write, this_len);
790 ret = write_raid56_with_parity(info, eb, multi,
791 stripe_len, raid_map);
797 } else while (dev_nr < multi->num_stripes) {
798 device = multi->stripes[dev_nr].dev;
799 if (device->fd == 0) {
804 dev_bytenr = multi->stripes[dev_nr].physical;
805 this_len = min(this_len, bytes_left);
808 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
809 if (ret != this_len) {
811 fprintf(stderr, "Error writing to "
812 "device %d\n", errno);
817 fprintf(stderr, "Short write\n");
824 BUG_ON(bytes_left < this_len);
826 bytes_left -= this_len;
828 total_write += this_len;
837 int set_extent_buffer_uptodate(struct extent_buffer *eb)
839 eb->flags |= EXTENT_UPTODATE;
843 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
844 struct extent_buffer *eb)
846 eb->flags &= ~EXTENT_UPTODATE;
850 int extent_buffer_uptodate(struct extent_buffer *eb)
855 if (eb->flags & EXTENT_UPTODATE)
860 int set_extent_buffer_dirty(struct extent_buffer *eb)
862 struct extent_io_tree *tree = eb->tree;
863 if (!(eb->flags & EXTENT_DIRTY)) {
864 eb->flags |= EXTENT_DIRTY;
865 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
866 extent_buffer_get(eb);
871 int clear_extent_buffer_dirty(struct extent_buffer *eb)
873 struct extent_io_tree *tree = eb->tree;
874 if (eb->flags & EXTENT_DIRTY) {
875 eb->flags &= ~EXTENT_DIRTY;
876 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
877 free_extent_buffer(eb);
882 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
883 unsigned long start, unsigned long len)
885 return memcmp(eb->data + start, ptrv, len);
888 void read_extent_buffer(struct extent_buffer *eb, void *dst,
889 unsigned long start, unsigned long len)
891 memcpy(dst, eb->data + start, len);
894 void write_extent_buffer(struct extent_buffer *eb, const void *src,
895 unsigned long start, unsigned long len)
897 memcpy(eb->data + start, src, len);
900 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
901 unsigned long dst_offset, unsigned long src_offset,
904 memcpy(dst->data + dst_offset, src->data + src_offset, len);
907 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
908 unsigned long src_offset, unsigned long len)
910 memcpy(dst->data + dst_offset, dst->data + src_offset, len);
913 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
914 unsigned long src_offset, unsigned long len)
916 memmove(dst->data + dst_offset, dst->data + src_offset, len);
919 void memset_extent_buffer(struct extent_buffer *eb, char c,
920 unsigned long start, unsigned long len)
922 memset(eb->data + start, c, len);